Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 14 Dec 2016 21:31:29 +0000 (13:31 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 14 Dec 2016 21:31:29 +0000 (13:31 -0800)
Pull crypto updates from Herbert Xu:
 "Here is the crypto update for 4.10:

  API:
   - add skcipher walk interface
   - add asynchronous compression (acomp) interface
   - fix algif_aed AIO handling of zero buffer

  Algorithms:
   - fix unaligned access in poly1305
   - fix DRBG output to large buffers

  Drivers:
   - add support for iMX6UL to caam
   - fix givenc descriptors (used by IPsec) in caam
   - accelerated SHA256/SHA512 for ARM64 from OpenSSL
   - add SSE CRCT10DIF and CRC32 to ARM/ARM64
   - add AEAD support to Chelsio chcr
   - add Armada 8K support to omap-rng"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (148 commits)
  crypto: testmgr - fix overlap in chunked tests again
  crypto: arm/crc32 - accelerated support based on x86 SSE implementation
  crypto: arm64/crc32 - accelerated support based on x86 SSE implementation
  crypto: arm/crct10dif - port x86 SSE implementation to ARM
  crypto: arm64/crct10dif - port x86 SSE implementation to arm64
  crypto: testmgr - add/enhance test cases for CRC-T10DIF
  crypto: testmgr - avoid overlap in chunked tests
  crypto: chcr - checking for IS_ERR() instead of NULL
  crypto: caam - check caam_emi_slow instead of re-lookup platform
  crypto: algif_aead - fix AIO handling of zero buffer
  crypto: aes-ce - Make aes_simd_algs static
  crypto: algif_skcipher - set error code when kcalloc fails
  crypto: caam - make aamalg_desc a proper module
  crypto: caam - pass key buffers with typesafe pointers
  crypto: arm64/aes-ce-ccm - Fix AEAD decryption length
  MAINTAINERS: add crypto headers to crypto entry
  crypt: doc - remove misleading mention of async API
  crypto: doc - fix header file name
  crypto: api - fix comment typo
  crypto: skcipher - Add separate walker for AEAD decryption
  ..

151 files changed:
Documentation/crypto/api-intro.txt
Documentation/devicetree/bindings/crypto/fsl-sec4.txt
Documentation/devicetree/bindings/rng/omap_rng.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
MAINTAINERS
arch/arm/crypto/Kconfig
arch/arm/crypto/Makefile
arch/arm/crypto/aes-ce-glue.c
arch/arm/crypto/aesbs-glue.c
arch/arm/crypto/crc32-ce-core.S [new file with mode: 0644]
arch/arm/crypto/crc32-ce-glue.c [new file with mode: 0644]
arch/arm/crypto/crct10dif-ce-core.S [new file with mode: 0644]
arch/arm/crypto/crct10dif-ce-glue.c [new file with mode: 0644]
arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
arch/arm64/crypto/.gitignore [new file with mode: 0644]
arch/arm64/crypto/Kconfig
arch/arm64/crypto/Makefile
arch/arm64/crypto/aes-ce-ccm-core.S
arch/arm64/crypto/aes-ce-ccm-glue.c
arch/arm64/crypto/aes-ce-cipher.c
arch/arm64/crypto/aes-ce.S
arch/arm64/crypto/aes-glue.c
arch/arm64/crypto/aes-modes.S
arch/arm64/crypto/aes-neon.S
arch/arm64/crypto/crc32-ce-core.S [new file with mode: 0644]
arch/arm64/crypto/crc32-ce-glue.c [new file with mode: 0644]
arch/arm64/crypto/crct10dif-ce-core.S [new file with mode: 0644]
arch/arm64/crypto/crct10dif-ce-glue.c [new file with mode: 0644]
arch/arm64/crypto/ghash-ce-core.S
arch/arm64/crypto/sha1-ce-core.S
arch/arm64/crypto/sha2-ce-core.S
arch/arm64/crypto/sha256-core.S_shipped [new file with mode: 0644]
arch/arm64/crypto/sha256-glue.c [new file with mode: 0644]
arch/arm64/crypto/sha512-armv8.pl [new file with mode: 0644]
arch/arm64/crypto/sha512-core.S_shipped [new file with mode: 0644]
arch/arm64/crypto/sha512-glue.c [new file with mode: 0644]
arch/powerpc/crypto/Makefile
arch/x86/crypto/aesni-intel_glue.c
arch/x86/crypto/fpu.c
arch/x86/crypto/glue_helper.c
arch/x86/crypto/sha1-mb/sha1_mb.c
arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
arch/x86/crypto/sha256-mb/sha256_mb.c
arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
arch/x86/crypto/sha512-mb/sha512_mb.c
arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
arch/x86/include/asm/crypto/glue_helper.h
crypto/842.c
crypto/Kconfig
crypto/Makefile
crypto/acompress.c [new file with mode: 0644]
crypto/algboss.c
crypto/algif_aead.c
crypto/algif_skcipher.c
crypto/api.c
crypto/authenc.c
crypto/authencesn.c
crypto/cbc.c
crypto/ccm.c
crypto/chacha20poly1305.c
crypto/cipher.c
crypto/cmac.c
crypto/compress.c
crypto/cryptd.c
crypto/crypto_engine.c
crypto/crypto_user.c
crypto/ctr.c
crypto/cts.c
crypto/deflate.c
crypto/dh.c
crypto/drbg.c
crypto/gcm.c
crypto/gf128mul.c
crypto/internal.h
crypto/jitterentropy-kcapi.c
crypto/lrw.c
crypto/lz4.c
crypto/lz4hc.c
crypto/lzo.c
crypto/pcbc.c
crypto/poly1305_generic.c
crypto/scompress.c [new file with mode: 0644]
crypto/simd.c [new file with mode: 0644]
crypto/skcipher.c
crypto/testmgr.c
crypto/testmgr.h
crypto/xts.c
drivers/char/hw_random/Kconfig
drivers/char/hw_random/atmel-rng.c
drivers/char/hw_random/core.c
drivers/char/hw_random/meson-rng.c
drivers/char/hw_random/msm-rng.c
drivers/char/hw_random/omap-rng.c
drivers/char/hw_random/pic32-rng.c
drivers/char/hw_random/pseries-rng.c
drivers/crypto/amcc/crypto4xx_core.c
drivers/crypto/atmel-aes-regs.h
drivers/crypto/atmel-aes.c
drivers/crypto/caam/Kconfig
drivers/crypto/caam/Makefile
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/caamalg_desc.c [new file with mode: 0644]
drivers/crypto/caam/caamalg_desc.h [new file with mode: 0644]
drivers/crypto/caam/caamhash.c
drivers/crypto/caam/caampkc.c
drivers/crypto/caam/caamrng.c
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/desc.h
drivers/crypto/caam/desc_constr.h
drivers/crypto/caam/error.c
drivers/crypto/caam/intern.h
drivers/crypto/caam/jr.c
drivers/crypto/caam/key_gen.c
drivers/crypto/caam/key_gen.h
drivers/crypto/caam/sg_sw_sec4.h
drivers/crypto/ccp/ccp-dev-v3.c
drivers/crypto/ccp/ccp-dev-v5.c
drivers/crypto/ccp/ccp-dev.c
drivers/crypto/ccp/ccp-dev.h
drivers/crypto/chelsio/Kconfig
drivers/crypto/chelsio/chcr_algo.c
drivers/crypto/chelsio/chcr_algo.h
drivers/crypto/chelsio/chcr_core.c
drivers/crypto/chelsio/chcr_core.h
drivers/crypto/chelsio/chcr_crypto.h
drivers/crypto/marvell/cesa.c
drivers/crypto/marvell/cesa.h
drivers/crypto/marvell/cipher.c
drivers/crypto/marvell/hash.c
drivers/crypto/marvell/tdma.c
drivers/crypto/mv_cesa.c
drivers/crypto/nx/nx.c
drivers/crypto/sahara.c
drivers/crypto/talitos.c
drivers/crypto/vmx/Makefile
include/crypto/acompress.h [new file with mode: 0644]
include/crypto/cbc.h [new file with mode: 0644]
include/crypto/cryptd.h
include/crypto/engine.h
include/crypto/gf128mul.h
include/crypto/internal/acompress.h [new file with mode: 0644]
include/crypto/internal/scompress.h [new file with mode: 0644]
include/crypto/internal/simd.h [new file with mode: 0644]
include/crypto/internal/skcipher.h
include/crypto/xts.h
include/linux/ccp.h
include/linux/crypto.h
include/linux/hw_random.h
include/uapi/linux/cryptouser.h
kernel/padata.c

index beda682..45d943f 100644 (file)
@@ -44,12 +44,9 @@ one block while the former can operate on an arbitrary amount of data,
 subject to block size requirements (i.e., non-stream ciphers can only
 process multiples of blocks).
 
-Support for hardware crypto devices via an asynchronous interface is
-under development.
-
 Here's an example of how to use the API:
 
-       #include <crypto/ahash.h>
+       #include <crypto/hash.h>
        #include <linux/err.h>
        #include <linux/scatterlist.h>
        
index adeca34..10a425f 100644 (file)
@@ -123,6 +123,9 @@ PROPERTIES
 
 
 EXAMPLE
+
+iMX6QDL/SX requires four clocks
+
        crypto@300000 {
                compatible = "fsl,sec-v4.0";
                fsl,sec-era = <2>;
@@ -139,6 +142,23 @@ EXAMPLE
                clock-names = "mem", "aclk", "ipg", "emi_slow";
        };
 
+
+iMX6UL does only require three clocks
+
+       crypto: caam@2140000 {
+               compatible = "fsl,sec-v4.0";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               reg = <0x2140000 0x3c000>;
+               ranges = <0 0x2140000 0x3c000>;
+               interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+
+               clocks = <&clks IMX6UL_CLK_CAAM_MEM>,
+                        <&clks IMX6UL_CLK_CAAM_ACLK>,
+                        <&clks IMX6UL_CLK_CAAM_IPG>;
+               clock-names = "mem", "aclk", "ipg";
+       };
+
 =====================================================================
 Job Ring (JR) Node
 
index 6a62acd..4714772 100644 (file)
@@ -1,4 +1,4 @@
-OMAP SoC HWRNG Module
+OMAP SoC and Inside-Secure HWRNG Module
 
 Required properties:
 
@@ -6,11 +6,13 @@ Required properties:
   RNG versions:
   - "ti,omap2-rng" for OMAP2.
   - "ti,omap4-rng" for OMAP4, OMAP5 and AM33XX.
+  - "inside-secure,safexcel-eip76" for SoCs with EIP76 IP block
   Note that these two versions are incompatible.
 - ti,hwmods: Name of the hwmod associated with the RNG module
 - reg : Offset and length of the register set for the module
 - interrupts : the interrupt number for the RNG module.
-               Only used for "ti,omap4-rng".
+               Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
+- clocks: the trng clock source
 
 Example:
 /* AM335x */
@@ -20,3 +22,11 @@ rng: rng@48310000 {
        reg = <0x48310000 0x2000>;
        interrupts = <111>;
 };
+
+/* SafeXcel IP-76 */
+trng: rng@f2760000 {
+       compatible = "inside-secure,safexcel-eip76";
+       reg = <0xf2760000 0x7d>;
+       interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+       clocks = <&cpm_syscon0 1 25>;
+};
index 9837175..078834a 100644 (file)
@@ -137,6 +137,7 @@ infineon Infineon Technologies
 inforce        Inforce Computing
 ingenic        Ingenic Semiconductor
 innolux        Innolux Corporation
+inside-secure  INSIDE Secure
 intel  Intel Corporation
 intercontrol   Inter Control Group
 invensense     InvenSense Inc.
index 60a01bd..59c9895 100644 (file)
@@ -3470,6 +3470,7 @@ F:        arch/*/crypto/
 F:     crypto/
 F:     drivers/crypto/
 F:     include/crypto/
+F:     include/linux/crypto*
 
 CRYPTOGRAPHIC RANDOM NUMBER GENERATOR
 M:     Neil Horman <nhorman@tuxdriver.com>
@@ -5086,6 +5087,14 @@ F:       include/linux/fb.h
 F:     include/uapi/video/
 F:     include/uapi/linux/fb.h
 
+FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
+M:     Horia Geantă <horia.geanta@nxp.com>
+M:     Dan Douglass <dan.douglass@nxp.com>
+L:     linux-crypto@vger.kernel.org
+S:     Maintained
+F:     drivers/crypto/caam/
+F:     Documentation/devicetree/bindings/crypto/fsl-sec4.txt
+
 FREESCALE DIU FRAMEBUFFER DRIVER
 M:     Timur Tabi <timur@tabi.org>
 L:     linux-fbdev@vger.kernel.org
index 27ed1b1..13f1b4c 100644 (file)
@@ -88,9 +88,9 @@ config CRYPTO_AES_ARM
 config CRYPTO_AES_ARM_BS
        tristate "Bit sliced AES using NEON instructions"
        depends on KERNEL_MODE_NEON
-       select CRYPTO_ALGAPI
        select CRYPTO_AES_ARM
-       select CRYPTO_ABLK_HELPER
+       select CRYPTO_BLKCIPHER
+       select CRYPTO_SIMD
        help
          Use a faster and more secure NEON based implementation of AES in CBC,
          CTR and XTS modes
@@ -104,8 +104,8 @@ config CRYPTO_AES_ARM_BS
 config CRYPTO_AES_ARM_CE
        tristate "Accelerated AES using ARMv8 Crypto Extensions"
        depends on KERNEL_MODE_NEON
-       select CRYPTO_ALGAPI
-       select CRYPTO_ABLK_HELPER
+       select CRYPTO_BLKCIPHER
+       select CRYPTO_SIMD
        help
          Use an implementation of AES in CBC, CTR and XTS modes that uses
          ARMv8 Crypto Extensions
@@ -120,4 +120,14 @@ config CRYPTO_GHASH_ARM_CE
          that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
          that is part of the ARMv8 Crypto Extensions
 
+config CRYPTO_CRCT10DIF_ARM_CE
+       tristate "CRCT10DIF digest algorithm using PMULL instructions"
+       depends on KERNEL_MODE_NEON && CRC_T10DIF
+       select CRYPTO_HASH
+
+config CRYPTO_CRC32_ARM_CE
+       tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
+       depends on KERNEL_MODE_NEON && CRC32
+       select CRYPTO_HASH
+
 endif
index fc51507..b578a18 100644 (file)
@@ -13,6 +13,8 @@ ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
 
 ifneq ($(ce-obj-y)$(ce-obj-m),)
 ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
@@ -36,6 +38,8 @@ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
 sha2-arm-ce-y  := sha2-ce-core.o sha2-ce-glue.o
 aes-arm-ce-y   := aes-ce-core.o aes-ce-glue.o
 ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
+crct10dif-arm-ce-y     := crct10dif-ce-core.o crct10dif-ce-glue.o
+crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
index aef022a..8857531 100644 (file)
@@ -12,8 +12,8 @@
 #include <asm/neon.h>
 #include <asm/hwcap.h>
 #include <crypto/aes.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 #include <crypto/xts.h>
 
@@ -88,8 +88,13 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
                u32 *rki = ctx->key_enc + (i * kwords);
                u32 *rko = rki + kwords;
 
+#ifndef CONFIG_CPU_BIG_ENDIAN
                rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
                rko[0] = rko[0] ^ rki[0] ^ rcon[i];
+#else
+               rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8);
+               rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24);
+#endif
                rko[1] = rko[0] ^ rki[1];
                rko[2] = rko[1] ^ rki[2];
                rko[3] = rko[2] ^ rki[3];
@@ -128,17 +133,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
        return 0;
 }
 
-static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
                         unsigned int key_len)
 {
-       struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
        int ret;
 
        ret = ce_aes_expandkey(ctx, in_key, key_len);
        if (!ret)
                return 0;
 
-       tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+       crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
        return -EINVAL;
 }
 
@@ -147,13 +152,13 @@ struct crypto_aes_xts_ctx {
        struct crypto_aes_ctx __aligned(8) key2;
 };
 
-static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
                       unsigned int key_len)
 {
-       struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int ret;
 
-       ret = xts_check_key(tfm, in_key, key_len);
+       ret = xts_verify_key(tfm, in_key, key_len);
        if (ret)
                return ret;
 
@@ -164,130 +169,113 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
        if (!ret)
                return 0;
 
-       tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+       crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
        return -EINVAL;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        unsigned int blocks;
        int err;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
                ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        unsigned int blocks;
        int err;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
                ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        unsigned int blocks;
        int err;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
                ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
                                   walk.iv);
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        unsigned int blocks;
        int err;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
                ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
                                   walk.iv);
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int ctr_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        int err, blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
                ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
                                   walk.iv);
-               nbytes -= blocks * AES_BLOCK_SIZE;
-               if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
-                       break;
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
-       if (walk.nbytes % AES_BLOCK_SIZE) {
-               u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-               u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+       if (walk.nbytes) {
                u8 __aligned(8) tail[AES_BLOCK_SIZE];
+               unsigned int nbytes = walk.nbytes;
+               u8 *tdst = walk.dst.virt.addr;
+               u8 *tsrc = walk.src.virt.addr;
 
                /*
                 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
@@ -298,231 +286,172 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
                                   num_rounds(ctx), blocks, walk.iv);
                memcpy(tdst, tail, nbytes);
-               err = blkcipher_walk_done(desc, &walk, 0);
+               err = skcipher_walk_done(&walk, 0);
        }
        kernel_neon_end();
 
        return err;
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = num_rounds(&ctx->key1);
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   (u8 *)ctx->key1.key_enc, rounds, blocks,
                                   walk.iv, (u8 *)ctx->key2.key_enc, first);
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
 
        return err;
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = num_rounds(&ctx->key1);
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   (u8 *)ctx->key1.key_dec, rounds, blocks,
                                   walk.iv, (u8 *)ctx->key2.key_enc, first);
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
 
        return err;
 }
 
-static struct crypto_alg aes_algs[] = { {
-       .cra_name               = "__ecb-aes-ce",
-       .cra_driver_name        = "__driver-ecb-aes-ce",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = 0,
-               .setkey         = ce_aes_setkey,
-               .encrypt        = ecb_encrypt,
-               .decrypt        = ecb_decrypt,
+static struct skcipher_alg aes_algs[] = { {
+       .base = {
+               .cra_name               = "__ecb(aes)",
+               .cra_driver_name        = "__ecb-aes-ce",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .setkey         = ce_aes_setkey,
+       .encrypt        = ecb_encrypt,
+       .decrypt        = ecb_decrypt,
 }, {
-       .cra_name               = "__cbc-aes-ce",
-       .cra_driver_name        = "__driver-cbc-aes-ce",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ce_aes_setkey,
-               .encrypt        = cbc_encrypt,
-               .decrypt        = cbc_decrypt,
+       .base = {
+               .cra_name               = "__cbc(aes)",
+               .cra_driver_name        = "__cbc-aes-ce",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .setkey         = ce_aes_setkey,
+       .encrypt        = cbc_encrypt,
+       .decrypt        = cbc_decrypt,
 }, {
-       .cra_name               = "__ctr-aes-ce",
-       .cra_driver_name        = "__driver-ctr-aes-ce",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ce_aes_setkey,
-               .encrypt        = ctr_encrypt,
-               .decrypt        = ctr_encrypt,
+       .base = {
+               .cra_name               = "__ctr(aes)",
+               .cra_driver_name        = "__ctr-aes-ce",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = 1,
+               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .chunksize      = AES_BLOCK_SIZE,
+       .setkey         = ce_aes_setkey,
+       .encrypt        = ctr_encrypt,
+       .decrypt        = ctr_encrypt,
 }, {
-       .cra_name               = "__xts-aes-ce",
-       .cra_driver_name        = "__driver-xts-aes-ce",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_xts_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = xts_set_key,
-               .encrypt        = xts_encrypt,
-               .decrypt        = xts_decrypt,
+       .base = {
+               .cra_name               = "__xts(aes)",
+               .cra_driver_name        = "__xts-aes-ce",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct crypto_aes_xts_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
-}, {
-       .cra_name               = "ecb(aes)",
-       .cra_driver_name        = "ecb-aes-ce",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = 0,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "cbc(aes)",
-       .cra_driver_name        = "cbc-aes-ce",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "ctr(aes)",
-       .cra_driver_name        = "ctr-aes-ce",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "xts(aes)",
-       .cra_driver_name        = "xts-aes-ce",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
+       .min_keysize    = 2 * AES_MIN_KEY_SIZE,
+       .max_keysize    = 2 * AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .setkey         = xts_set_key,
+       .encrypt        = xts_encrypt,
+       .decrypt        = xts_decrypt,
 } };
 
+static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
+
+static void aes_exit(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
+               simd_skcipher_free(aes_simd_algs[i]);
+
+       crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
 static int __init aes_init(void)
 {
+       struct simd_skcipher_alg *simd;
+       const char *basename;
+       const char *algname;
+       const char *drvname;
+       int err;
+       int i;
+
        if (!(elf_hwcap2 & HWCAP2_AES))
                return -ENODEV;
-       return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
-}
 
-static void __exit aes_exit(void)
-{
-       crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+       err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+       if (err)
+               return err;
+
+       for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+               algname = aes_algs[i].base.cra_name + 2;
+               drvname = aes_algs[i].base.cra_driver_name + 2;
+               basename = aes_algs[i].base.cra_driver_name;
+               simd = simd_skcipher_create_compat(algname, drvname, basename);
+               err = PTR_ERR(simd);
+               if (IS_ERR(simd))
+                       goto unregister_simds;
+
+               aes_simd_algs[i] = simd;
+       }
+
+       return 0;
+
+unregister_simds:
+       aes_exit();
+       return err;
 }
 
 module_init(aes_init);
index 0511a6c..d8e06de 100644 (file)
@@ -10,8 +10,9 @@
 
 #include <asm/neon.h>
 #include <crypto/aes.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
+#include <crypto/cbc.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 #include <crypto/xts.h>
 
@@ -55,14 +56,14 @@ struct aesbs_xts_ctx {
        struct AES_KEY  twkey;
 };
 
-static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
                             unsigned int key_len)
 {
-       struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
        int bits = key_len * 8;
 
        if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
                return -EINVAL;
        }
        ctx->dec.rk = ctx->enc;
@@ -71,33 +72,33 @@ static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
        return 0;
 }
 
-static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
                             unsigned int key_len)
 {
-       struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
        int bits = key_len * 8;
 
        if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
                return -EINVAL;
        }
        ctx->enc.converted = 0;
        return 0;
 }
 
-static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
                             unsigned int key_len)
 {
-       struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int bits = key_len * 4;
        int err;
 
-       err = xts_check_key(tfm, in_key, key_len);
+       err = xts_verify_key(tfm, in_key, key_len);
        if (err)
                return err;
 
        if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
-               tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+               crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
                return -EINVAL;
        }
        ctx->dec.rk = ctx->enc.rk;
@@ -107,88 +108,52 @@ static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
        return 0;
 }
 
-static int aesbs_cbc_encrypt(struct blkcipher_desc *desc,
-                            struct scatterlist *dst,
-                            struct scatterlist *src, unsigned int nbytes)
+static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm,
+                                    const u8 *src, u8 *dst)
 {
-       struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
-       int err;
+       struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       AES_encrypt(src, dst, &ctx->enc);
+}
 
-       while (walk.nbytes) {
-               u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
-               u8 *src = walk.src.virt.addr;
+static int aesbs_cbc_encrypt(struct skcipher_request *req)
+{
+       return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one);
+}
 
-               if (walk.dst.virt.addr == walk.src.virt.addr) {
-                       u8 *iv = walk.iv;
-
-                       do {
-                               crypto_xor(src, iv, AES_BLOCK_SIZE);
-                               AES_encrypt(src, src, &ctx->enc);
-                               iv = src;
-                               src += AES_BLOCK_SIZE;
-                       } while (--blocks);
-                       memcpy(walk.iv, iv, AES_BLOCK_SIZE);
-               } else {
-                       u8 *dst = walk.dst.virt.addr;
-
-                       do {
-                               crypto_xor(walk.iv, src, AES_BLOCK_SIZE);
-                               AES_encrypt(walk.iv, dst, &ctx->enc);
-                               memcpy(walk.iv, dst, AES_BLOCK_SIZE);
-                               src += AES_BLOCK_SIZE;
-                               dst += AES_BLOCK_SIZE;
-                       } while (--blocks);
-               }
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
-       }
-       return err;
+static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm,
+                                    const u8 *src, u8 *dst)
+{
+       struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+       AES_decrypt(src, dst, &ctx->dec.rk);
 }
 
-static int aesbs_cbc_decrypt(struct blkcipher_desc *desc,
-                            struct scatterlist *dst,
-                            struct scatterlist *src, unsigned int nbytes)
+static int aesbs_cbc_decrypt(struct skcipher_request *req)
 {
-       struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
-
-       while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) {
-               kernel_neon_begin();
-               bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
-                                 walk.nbytes, &ctx->dec, walk.iv);
-               kernel_neon_end();
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
-       }
-       while (walk.nbytes) {
-               u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
+       for (err = skcipher_walk_virt(&walk, req, false);
+            (nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) {
+               u32 blocks = nbytes / AES_BLOCK_SIZE;
                u8 *dst = walk.dst.virt.addr;
                u8 *src = walk.src.virt.addr;
-               u8 bk[2][AES_BLOCK_SIZE];
                u8 *iv = walk.iv;
 
-               do {
-                       if (walk.dst.virt.addr == walk.src.virt.addr)
-                               memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE);
-
-                       AES_decrypt(src, dst, &ctx->dec.rk);
-                       crypto_xor(dst, iv, AES_BLOCK_SIZE);
-
-                       if (walk.dst.virt.addr == walk.src.virt.addr)
-                               iv = bk[blocks & 1];
-                       else
-                               iv = src;
+               if (blocks >= 8) {
+                       kernel_neon_begin();
+                       bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv);
+                       kernel_neon_end();
+                       nbytes %= AES_BLOCK_SIZE;
+                       continue;
+               }
 
-                       dst += AES_BLOCK_SIZE;
-                       src += AES_BLOCK_SIZE;
-               } while (--blocks);
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               nbytes = crypto_cbc_decrypt_blocks(&walk, tfm,
+                                                  aesbs_decrypt_one);
        }
        return err;
 }
@@ -206,17 +171,15 @@ static void inc_be128_ctr(__be32 ctr[], u32 addend)
        }
 }
 
-static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
-                            struct scatterlist *dst, struct scatterlist *src,
-                            unsigned int nbytes)
+static int aesbs_ctr_encrypt(struct skcipher_request *req)
 {
-       struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        u32 blocks;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+       err = skcipher_walk_virt(&walk, req, false);
 
        while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
                u32 tail = walk.nbytes % AES_BLOCK_SIZE;
@@ -235,11 +198,7 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
                kernel_neon_end();
                inc_be128_ctr(ctr, blocks);
 
-               nbytes -= blocks * AES_BLOCK_SIZE;
-               if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE)
-                       break;
-
-               err = blkcipher_walk_done(desc, &walk, tail);
+               err = skcipher_walk_done(&walk, tail);
        }
        if (walk.nbytes) {
                u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -248,23 +207,21 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
 
                AES_encrypt(walk.iv, ks, &ctx->enc.rk);
                if (tdst != tsrc)
-                       memcpy(tdst, tsrc, nbytes);
-               crypto_xor(tdst, ks, nbytes);
-               err = blkcipher_walk_done(desc, &walk, 0);
+                       memcpy(tdst, tsrc, walk.nbytes);
+               crypto_xor(tdst, ks, walk.nbytes);
+               err = skcipher_walk_done(&walk, 0);
        }
        return err;
 }
 
-static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
-                            struct scatterlist *dst,
-                            struct scatterlist *src, unsigned int nbytes)
+static int aesbs_xts_encrypt(struct skcipher_request *req)
 {
-       struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+       err = skcipher_walk_virt(&walk, req, false);
 
        /* generate the initial tweak */
        AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
@@ -274,21 +231,19 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
                bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
                                  walk.nbytes, &ctx->enc, walk.iv);
                kernel_neon_end();
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        return err;
 }
 
-static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
-                            struct scatterlist *dst,
-                            struct scatterlist *src, unsigned int nbytes)
+static int aesbs_xts_decrypt(struct skcipher_request *req)
 {
-       struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_walk walk;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+       err = skcipher_walk_virt(&walk, req, false);
 
        /* generate the initial tweak */
        AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
@@ -298,141 +253,110 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
                bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
                                  walk.nbytes, &ctx->dec, walk.iv);
                kernel_neon_end();
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        return err;
 }
 
-static struct crypto_alg aesbs_algs[] = { {
-       .cra_name               = "__cbc-aes-neonbs",
-       .cra_driver_name        = "__driver-cbc-aes-neonbs",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct aesbs_cbc_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = aesbs_cbc_set_key,
-               .encrypt        = aesbs_cbc_encrypt,
-               .decrypt        = aesbs_cbc_decrypt,
+static struct skcipher_alg aesbs_algs[] = { {
+       .base = {
+               .cra_name               = "__cbc(aes)",
+               .cra_driver_name        = "__cbc-aes-neonbs",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct aesbs_cbc_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .setkey         = aesbs_cbc_set_key,
+       .encrypt        = aesbs_cbc_encrypt,
+       .decrypt        = aesbs_cbc_decrypt,
 }, {
-       .cra_name               = "__ctr-aes-neonbs",
-       .cra_driver_name        = "__driver-ctr-aes-neonbs",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct aesbs_ctr_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = aesbs_ctr_set_key,
-               .encrypt        = aesbs_ctr_encrypt,
-               .decrypt        = aesbs_ctr_encrypt,
+       .base = {
+               .cra_name               = "__ctr(aes)",
+               .cra_driver_name        = "__ctr-aes-neonbs",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = 1,
+               .cra_ctxsize            = sizeof(struct aesbs_ctr_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .chunksize      = AES_BLOCK_SIZE,
+       .setkey         = aesbs_ctr_set_key,
+       .encrypt        = aesbs_ctr_encrypt,
+       .decrypt        = aesbs_ctr_encrypt,
 }, {
-       .cra_name               = "__xts-aes-neonbs",
-       .cra_driver_name        = "__driver-xts-aes-neonbs",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct aesbs_xts_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = aesbs_xts_set_key,
-               .encrypt        = aesbs_xts_encrypt,
-               .decrypt        = aesbs_xts_decrypt,
+       .base = {
+               .cra_name               = "__xts(aes)",
+               .cra_driver_name        = "__xts-aes-neonbs",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct aesbs_xts_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
-}, {
-       .cra_name               = "cbc(aes)",
-       .cra_driver_name        = "cbc-aes-neonbs",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = __ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "ctr(aes)",
-       .cra_driver_name        = "ctr-aes-neonbs",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "xts(aes)",
-       .cra_driver_name        = "xts-aes-neonbs",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
+       .min_keysize    = 2 * AES_MIN_KEY_SIZE,
+       .max_keysize    = 2 * AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .setkey         = aesbs_xts_set_key,
+       .encrypt        = aesbs_xts_encrypt,
+       .decrypt        = aesbs_xts_decrypt,
 } };
 
+struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)];
+
+static void aesbs_mod_exit(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++)
+               simd_skcipher_free(aesbs_simd_algs[i]);
+
+       crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+}
+
 static int __init aesbs_mod_init(void)
 {
+       struct simd_skcipher_alg *simd;
+       const char *basename;
+       const char *algname;
+       const char *drvname;
+       int err;
+       int i;
+
        if (!cpu_has_neon())
                return -ENODEV;
 
-       return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
-}
+       err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+       if (err)
+               return err;
 
-static void __exit aesbs_mod_exit(void)
-{
-       crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+       for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) {
+               algname = aesbs_algs[i].base.cra_name + 2;
+               drvname = aesbs_algs[i].base.cra_driver_name + 2;
+               basename = aesbs_algs[i].base.cra_driver_name;
+               simd = simd_skcipher_create_compat(algname, drvname, basename);
+               err = PTR_ERR(simd);
+               if (IS_ERR(simd))
+                       goto unregister_simds;
+
+               aesbs_simd_algs[i] = simd;
+       }
+
+       return 0;
+
+unregister_simds:
+       aesbs_mod_exit();
+       return err;
 }
 
 module_init(aesbs_mod_init);
diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/crypto/crc32-ce-core.S
new file mode 100644 (file)
index 0000000..e63d400
--- /dev/null
@@ -0,0 +1,306 @@
+/*
+ * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
+ * calculation.
+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
+ * at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2B: Instruction Set Reference, N-Z
+ *
+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
+ *           Alexander Boyko <Alexander_Boyko@xyratex.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+       .text
+       .align          6
+       .arch           armv8-a
+       .arch_extension crc
+       .fpu            crypto-neon-fp-armv8
+
+.Lcrc32_constants:
+       /*
+        * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
+        * #define CONSTANT_R1  0x154442bd4LL
+        *
+        * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
+        * #define CONSTANT_R2  0x1c6e41596LL
+        */
+       .quad           0x0000000154442bd4
+       .quad           0x00000001c6e41596
+
+       /*
+        * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
+        * #define CONSTANT_R3  0x1751997d0LL
+        *
+        * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
+        * #define CONSTANT_R4  0x0ccaa009eLL
+        */
+       .quad           0x00000001751997d0
+       .quad           0x00000000ccaa009e
+
+       /*
+        * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
+        * #define CONSTANT_R5  0x163cd6124LL
+        */
+       .quad           0x0000000163cd6124
+       .quad           0x00000000FFFFFFFF
+
+       /*
+        * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
+        *
+        * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
+        *                                                      = 0x1F7011641LL
+        * #define CONSTANT_RU  0x1F7011641LL
+        */
+       .quad           0x00000001DB710641
+       .quad           0x00000001F7011641
+
+.Lcrc32c_constants:
+       .quad           0x00000000740eef02
+       .quad           0x000000009e4addf8
+       .quad           0x00000000f20c0dfe
+       .quad           0x000000014cd00bd6
+       .quad           0x00000000dd45aab8
+       .quad           0x00000000FFFFFFFF
+       .quad           0x0000000105ec76f0
+       .quad           0x00000000dea713f1
+
+       dCONSTANTl      .req    d0
+       dCONSTANTh      .req    d1
+       qCONSTANT       .req    q0
+
+       BUF             .req    r0
+       LEN             .req    r1
+       CRC             .req    r2
+
+       qzr             .req    q9
+
+       /**
+        * Calculate crc32
+        * BUF - buffer
+        * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
+        * CRC - initial crc32
+        * return %eax crc32
+        * uint crc32_pmull_le(unsigned char const *buffer,
+        *                     size_t len, uint crc32)
+        */
+ENTRY(crc32_pmull_le)
+       adr             r3, .Lcrc32_constants
+       b               0f
+
+ENTRY(crc32c_pmull_le)
+       adr             r3, .Lcrc32c_constants
+
+0:     bic             LEN, LEN, #15
+       vld1.8          {q1-q2}, [BUF, :128]!
+       vld1.8          {q3-q4}, [BUF, :128]!
+       vmov.i8         qzr, #0
+       vmov.i8         qCONSTANT, #0
+       vmov            dCONSTANTl[0], CRC
+       veor.8          d2, d2, dCONSTANTl
+       sub             LEN, LEN, #0x40
+       cmp             LEN, #0x40
+       blt             less_64
+
+       vld1.64         {qCONSTANT}, [r3]
+
+loop_64:               /* 64 bytes Full cache line folding */
+       sub             LEN, LEN, #0x40
+
+       vmull.p64       q5, d3, dCONSTANTh
+       vmull.p64       q6, d5, dCONSTANTh
+       vmull.p64       q7, d7, dCONSTANTh
+       vmull.p64       q8, d9, dCONSTANTh
+
+       vmull.p64       q1, d2, dCONSTANTl
+       vmull.p64       q2, d4, dCONSTANTl
+       vmull.p64       q3, d6, dCONSTANTl
+       vmull.p64       q4, d8, dCONSTANTl
+
+       veor.8          q1, q1, q5
+       vld1.8          {q5}, [BUF, :128]!
+       veor.8          q2, q2, q6
+       vld1.8          {q6}, [BUF, :128]!
+       veor.8          q3, q3, q7
+       vld1.8          {q7}, [BUF, :128]!
+       veor.8          q4, q4, q8
+       vld1.8          {q8}, [BUF, :128]!
+
+       veor.8          q1, q1, q5
+       veor.8          q2, q2, q6
+       veor.8          q3, q3, q7
+       veor.8          q4, q4, q8
+
+       cmp             LEN, #0x40
+       bge             loop_64
+
+less_64:               /* Folding cache line into 128bit */
+       vldr            dCONSTANTl, [r3, #16]
+       vldr            dCONSTANTh, [r3, #24]
+
+       vmull.p64       q5, d3, dCONSTANTh
+       vmull.p64       q1, d2, dCONSTANTl
+       veor.8          q1, q1, q5
+       veor.8          q1, q1, q2
+
+       vmull.p64       q5, d3, dCONSTANTh
+       vmull.p64       q1, d2, dCONSTANTl
+       veor.8          q1, q1, q5
+       veor.8          q1, q1, q3
+
+       vmull.p64       q5, d3, dCONSTANTh
+       vmull.p64       q1, d2, dCONSTANTl
+       veor.8          q1, q1, q5
+       veor.8          q1, q1, q4
+
+       teq             LEN, #0
+       beq             fold_64
+
+loop_16:               /* Folding rest buffer into 128bit */
+       subs            LEN, LEN, #0x10
+
+       vld1.8          {q2}, [BUF, :128]!
+       vmull.p64       q5, d3, dCONSTANTh
+       vmull.p64       q1, d2, dCONSTANTl
+       veor.8          q1, q1, q5
+       veor.8          q1, q1, q2
+
+       bne             loop_16
+
+fold_64:
+       /* perform the last 64 bit fold, also adds 32 zeroes
+        * to the input stream */
+       vmull.p64       q2, d2, dCONSTANTh
+       vext.8          q1, q1, qzr, #8
+       veor.8          q1, q1, q2
+
+       /* final 32-bit fold */
+       vldr            dCONSTANTl, [r3, #32]
+       vldr            d6, [r3, #40]
+       vmov.i8         d7, #0
+
+       vext.8          q2, q1, qzr, #4
+       vand.8          d2, d2, d6
+       vmull.p64       q1, d2, dCONSTANTl
+       veor.8          q1, q1, q2
+
+       /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
+       vldr            dCONSTANTl, [r3, #48]
+       vldr            dCONSTANTh, [r3, #56]
+
+       vand.8          q2, q1, q3
+       vext.8          q2, qzr, q2, #8
+       vmull.p64       q2, d5, dCONSTANTh
+       vand.8          q2, q2, q3
+       vmull.p64       q2, d4, dCONSTANTl
+       veor.8          q1, q1, q2
+       vmov            r0, s5
+
+       bx              lr
+ENDPROC(crc32_pmull_le)
+ENDPROC(crc32c_pmull_le)
+
+       .macro          __crc32, c
+       subs            ip, r2, #8
+       bmi             .Ltail\c
+
+       tst             r1, #3
+       bne             .Lunaligned\c
+
+       teq             ip, #0
+.Laligned8\c:
+       ldrd            r2, r3, [r1], #8
+ARM_BE8(rev            r2, r2          )
+ARM_BE8(rev            r3, r3          )
+       crc32\c\()w     r0, r0, r2
+       crc32\c\()w     r0, r0, r3
+       bxeq            lr
+       subs            ip, ip, #8
+       bpl             .Laligned8\c
+
+.Ltail\c:
+       tst             ip, #4
+       beq             2f
+       ldr             r3, [r1], #4
+ARM_BE8(rev            r3, r3          )
+       crc32\c\()w     r0, r0, r3
+
+2:     tst             ip, #2
+       beq             1f
+       ldrh            r3, [r1], #2
+ARM_BE8(rev16          r3, r3          )
+       crc32\c\()h     r0, r0, r3
+
+1:     tst             ip, #1
+       bxeq            lr
+       ldrb            r3, [r1]
+       crc32\c\()b     r0, r0, r3
+       bx              lr
+
+.Lunaligned\c:
+       tst             r1, #1
+       beq             2f
+       ldrb            r3, [r1], #1
+       subs            r2, r2, #1
+       crc32\c\()b     r0, r0, r3
+
+       tst             r1, #2
+       beq             0f
+2:     ldrh            r3, [r1], #2
+       subs            r2, r2, #2
+ARM_BE8(rev16          r3, r3          )
+       crc32\c\()h     r0, r0, r3
+
+0:     subs            ip, r2, #8
+       bpl             .Laligned8\c
+       b               .Ltail\c
+       .endm
+
+       .align          5
+ENTRY(crc32_armv8_le)
+       __crc32
+ENDPROC(crc32_armv8_le)
+
+       .align          5
+ENTRY(crc32c_armv8_le)
+       __crc32         c
+ENDPROC(crc32c_armv8_le)
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
new file mode 100644 (file)
index 0000000..e1566be
--- /dev/null
@@ -0,0 +1,242 @@
+/*
+ * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+
+#define PMULL_MIN_LEN          64L     /* minimum size of buffer
+                                        * for crc32_pmull_le_16 */
+#define SCALE_F                        16L     /* size of NEON register */
+
+asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
+asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
+
+asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
+asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
+
+static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len);
+static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len);
+
+static int crc32_cra_init(struct crypto_tfm *tfm)
+{
+       u32 *key = crypto_tfm_ctx(tfm);
+
+       *key = 0;
+       return 0;
+}
+
+static int crc32c_cra_init(struct crypto_tfm *tfm)
+{
+       u32 *key = crypto_tfm_ctx(tfm);
+
+       *key = ~0;
+       return 0;
+}
+
+static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
+                       unsigned int keylen)
+{
+       u32 *mctx = crypto_shash_ctx(hash);
+
+       if (keylen != sizeof(u32)) {
+               crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       *mctx = le32_to_cpup((__le32 *)key);
+       return 0;
+}
+
+static int crc32_init(struct shash_desc *desc)
+{
+       u32 *mctx = crypto_shash_ctx(desc->tfm);
+       u32 *crc = shash_desc_ctx(desc);
+
+       *crc = *mctx;
+       return 0;
+}
+
+static int crc32_update(struct shash_desc *desc, const u8 *data,
+                       unsigned int length)
+{
+       u32 *crc = shash_desc_ctx(desc);
+
+       *crc = crc32_armv8_le(*crc, data, length);
+       return 0;
+}
+
+static int crc32c_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int length)
+{
+       u32 *crc = shash_desc_ctx(desc);
+
+       *crc = crc32c_armv8_le(*crc, data, length);
+       return 0;
+}
+
+static int crc32_final(struct shash_desc *desc, u8 *out)
+{
+       u32 *crc = shash_desc_ctx(desc);
+
+       put_unaligned_le32(*crc, out);
+       return 0;
+}
+
+static int crc32c_final(struct shash_desc *desc, u8 *out)
+{
+       u32 *crc = shash_desc_ctx(desc);
+
+       put_unaligned_le32(~*crc, out);
+       return 0;
+}
+
+static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
+                             unsigned int length)
+{
+       u32 *crc = shash_desc_ctx(desc);
+       unsigned int l;
+
+       if (may_use_simd()) {
+               if ((u32)data % SCALE_F) {
+                       l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
+
+                       *crc = fallback_crc32(*crc, data, l);
+
+                       data += l;
+                       length -= l;
+               }
+
+               if (length >= PMULL_MIN_LEN) {
+                       l = round_down(length, SCALE_F);
+
+                       kernel_neon_begin();
+                       *crc = crc32_pmull_le(data, l, *crc);
+                       kernel_neon_end();
+
+                       data += l;
+                       length -= l;
+               }
+       }
+
+       if (length > 0)
+               *crc = fallback_crc32(*crc, data, length);
+
+       return 0;
+}
+
+static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
+                              unsigned int length)
+{
+       u32 *crc = shash_desc_ctx(desc);
+       unsigned int l;
+
+       if (may_use_simd()) {
+               if ((u32)data % SCALE_F) {
+                       l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
+
+                       *crc = fallback_crc32c(*crc, data, l);
+
+                       data += l;
+                       length -= l;
+               }
+
+               if (length >= PMULL_MIN_LEN) {
+                       l = round_down(length, SCALE_F);
+
+                       kernel_neon_begin();
+                       *crc = crc32c_pmull_le(data, l, *crc);
+                       kernel_neon_end();
+
+                       data += l;
+                       length -= l;
+               }
+       }
+
+       if (length > 0)
+               *crc = fallback_crc32c(*crc, data, length);
+
+       return 0;
+}
+
+static struct shash_alg crc32_pmull_algs[] = { {
+       .setkey                 = crc32_setkey,
+       .init                   = crc32_init,
+       .update                 = crc32_update,
+       .final                  = crc32_final,
+       .descsize               = sizeof(u32),
+       .digestsize             = sizeof(u32),
+
+       .base.cra_ctxsize       = sizeof(u32),
+       .base.cra_init          = crc32_cra_init,
+       .base.cra_name          = "crc32",
+       .base.cra_driver_name   = "crc32-arm-ce",
+       .base.cra_priority      = 200,
+       .base.cra_blocksize     = 1,
+       .base.cra_module        = THIS_MODULE,
+}, {
+       .setkey                 = crc32_setkey,
+       .init                   = crc32_init,
+       .update                 = crc32c_update,
+       .final                  = crc32c_final,
+       .descsize               = sizeof(u32),
+       .digestsize             = sizeof(u32),
+
+       .base.cra_ctxsize       = sizeof(u32),
+       .base.cra_init          = crc32c_cra_init,
+       .base.cra_name          = "crc32c",
+       .base.cra_driver_name   = "crc32c-arm-ce",
+       .base.cra_priority      = 200,
+       .base.cra_blocksize     = 1,
+       .base.cra_module        = THIS_MODULE,
+} };
+
+static int __init crc32_pmull_mod_init(void)
+{
+       if (elf_hwcap2 & HWCAP2_PMULL) {
+               crc32_pmull_algs[0].update = crc32_pmull_update;
+               crc32_pmull_algs[1].update = crc32c_pmull_update;
+
+               if (elf_hwcap2 & HWCAP2_CRC32) {
+                       fallback_crc32 = crc32_armv8_le;
+                       fallback_crc32c = crc32c_armv8_le;
+               } else {
+                       fallback_crc32 = crc32_le;
+                       fallback_crc32c = __crc32c_le;
+               }
+       } else if (!(elf_hwcap2 & HWCAP2_CRC32)) {
+               return -ENODEV;
+       }
+
+       return crypto_register_shashes(crc32_pmull_algs,
+                                      ARRAY_SIZE(crc32_pmull_algs));
+}
+
+static void __exit crc32_pmull_mod_exit(void)
+{
+       crypto_unregister_shashes(crc32_pmull_algs,
+                                 ARRAY_SIZE(crc32_pmull_algs));
+}
+
+module_init(crc32_pmull_mod_init);
+module_exit(crc32_pmull_mod_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32c");
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
new file mode 100644 (file)
index 0000000..ce45ba0
--- /dev/null
@@ -0,0 +1,427 @@
+//
+// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
+//
+// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+
+//
+// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
+//
+// Copyright (c) 2013, Intel Corporation
+//
+// Authors:
+//     Erdinc Ozturk <erdinc.ozturk@intel.com>
+//     Vinodh Gopal <vinodh.gopal@intel.com>
+//     James Guilford <james.guilford@intel.com>
+//     Tim Chen <tim.c.chen@linux.intel.com>
+//
+// This software is available to you under a choice of one of two
+// licenses.  You may choose to be licensed under the terms of the GNU
+// General Public License (GPL) Version 2, available from the file
+// COPYING in the main directory of this source tree, or the
+// OpenIB.org BSD license below:
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the
+//   distribution.
+//
+// * Neither the name of the Intel Corporation nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+//
+// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//       Function API:
+//       UINT16 crc_t10dif_pcl(
+//               UINT16 init_crc, //initial CRC value, 16 bits
+//               const unsigned char *buf, //buffer pointer to calculate CRC on
+//               UINT64 len //buffer length in bytes (64-bit data)
+//       );
+//
+//       Reference paper titled "Fast CRC Computation for Generic
+//     Polynomials Using PCLMULQDQ Instruction"
+//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
+//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+//
+//
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define CPU_LE(code...)
+#else
+#define CPU_LE(code...)                code
+#endif
+
+       .text
+       .fpu            crypto-neon-fp-armv8
+
+       arg1_low32      .req    r0
+       arg2            .req    r1
+       arg3            .req    r2
+
+       qzr             .req    q13
+
+       q0l             .req    d0
+       q0h             .req    d1
+       q1l             .req    d2
+       q1h             .req    d3
+       q2l             .req    d4
+       q2h             .req    d5
+       q3l             .req    d6
+       q3h             .req    d7
+       q4l             .req    d8
+       q4h             .req    d9
+       q5l             .req    d10
+       q5h             .req    d11
+       q6l             .req    d12
+       q6h             .req    d13
+       q7l             .req    d14
+       q7h             .req    d15
+
+ENTRY(crc_t10dif_pmull)
+       vmov.i8         qzr, #0                 // init zero register
+
+       // adjust the 16-bit initial_crc value, scale it to 32 bits
+       lsl             arg1_low32, arg1_low32, #16
+
+       // check if smaller than 256
+       cmp             arg3, #256
+
+       // for sizes less than 128, we can't fold 64B at a time...
+       blt             _less_than_128
+
+       // load the initial crc value
+       // crc value does not need to be byte-reflected, but it needs
+       // to be moved to the high part of the register.
+       // because data will be byte-reflected and will align with
+       // initial crc at correct place.
+       vmov            s0, arg1_low32          // initial crc
+       vext.8          q10, qzr, q0, #4
+
+       // receive the initial 64B data, xor the initial crc value
+       vld1.64         {q0-q1}, [arg2, :128]!
+       vld1.64         {q2-q3}, [arg2, :128]!
+       vld1.64         {q4-q5}, [arg2, :128]!
+       vld1.64         {q6-q7}, [arg2, :128]!
+CPU_LE(        vrev64.8        q0, q0                  )
+CPU_LE(        vrev64.8        q1, q1                  )
+CPU_LE(        vrev64.8        q2, q2                  )
+CPU_LE(        vrev64.8        q3, q3                  )
+CPU_LE(        vrev64.8        q4, q4                  )
+CPU_LE(        vrev64.8        q5, q5                  )
+CPU_LE(        vrev64.8        q6, q6                  )
+CPU_LE(        vrev64.8        q7, q7                  )
+
+       vswp            d0, d1
+       vswp            d2, d3
+       vswp            d4, d5
+       vswp            d6, d7
+       vswp            d8, d9
+       vswp            d10, d11
+       vswp            d12, d13
+       vswp            d14, d15
+
+       // XOR the initial_crc value
+       veor.8          q0, q0, q10
+
+       adr             ip, rk3
+       vld1.64         {q10}, [ip, :128]       // xmm10 has rk3 and rk4
+
+       //
+       // we subtract 256 instead of 128 to save one instruction from the loop
+       //
+       sub             arg3, arg3, #256
+
+       // at this section of the code, there is 64*x+y (0<=y<64) bytes of
+       // buffer. The _fold_64_B_loop will fold 64B at a time
+       // until we have 64+y Bytes of buffer
+
+
+       // fold 64B at a time. This section of the code folds 4 vector
+       // registers in parallel
+_fold_64_B_loop:
+
+       .macro          fold64, reg1, reg2
+       vld1.64         {q11-q12}, [arg2, :128]!
+
+       vmull.p64       q8, \reg1\()h, d21
+       vmull.p64       \reg1, \reg1\()l, d20
+       vmull.p64       q9, \reg2\()h, d21
+       vmull.p64       \reg2, \reg2\()l, d20
+
+CPU_LE(        vrev64.8        q11, q11                )
+CPU_LE(        vrev64.8        q12, q12                )
+       vswp            d22, d23
+       vswp            d24, d25
+
+       veor.8          \reg1, \reg1, q8
+       veor.8          \reg2, \reg2, q9
+       veor.8          \reg1, \reg1, q11
+       veor.8          \reg2, \reg2, q12
+       .endm
+
+       fold64          q0, q1
+       fold64          q2, q3
+       fold64          q4, q5
+       fold64          q6, q7
+
+       subs            arg3, arg3, #128
+
+       // check if there is another 64B in the buffer to be able to fold
+       bge             _fold_64_B_loop
+
+       // at this point, the buffer pointer is pointing at the last y Bytes
+       // of the buffer the 64B of folded data is in 4 of the vector
+       // registers: v0, v1, v2, v3
+
+       // fold the 8 vector registers to 1 vector register with different
+       // constants
+
+       adr             ip, rk9
+       vld1.64         {q10}, [ip, :128]!
+
+       .macro          fold16, reg, rk
+       vmull.p64       q8, \reg\()l, d20
+       vmull.p64       \reg, \reg\()h, d21
+       .ifnb           \rk
+       vld1.64         {q10}, [ip, :128]!
+       .endif
+       veor.8          q7, q7, q8
+       veor.8          q7, q7, \reg
+       .endm
+
+       fold16          q0, rk11
+       fold16          q1, rk13
+       fold16          q2, rk15
+       fold16          q3, rk17
+       fold16          q4, rk19
+       fold16          q5, rk1
+       fold16          q6
+
+       // instead of 64, we add 48 to the loop counter to save 1 instruction
+       // from the loop instead of a cmp instruction, we use the negative
+       // flag with the jl instruction
+       adds            arg3, arg3, #(128-16)
+       blt             _final_reduction_for_128
+
+       // now we have 16+y bytes left to reduce. 16 Bytes is in register v7
+       // and the rest is in memory. We can fold 16 bytes at a time if y>=16
+       // continue folding 16B at a time
+
+_16B_reduction_loop:
+       vmull.p64       q8, d14, d20
+       vmull.p64       q7, d15, d21
+       veor.8          q7, q7, q8
+
+       vld1.64         {q0}, [arg2, :128]!
+CPU_LE(        vrev64.8        q0, q0          )
+       vswp            d0, d1
+       veor.8          q7, q7, q0
+       subs            arg3, arg3, #16
+
+       // instead of a cmp instruction, we utilize the flags with the
+       // jge instruction equivalent of: cmp arg3, 16-16
+       // check if there is any more 16B in the buffer to be able to fold
+       bge             _16B_reduction_loop
+
+       // now we have 16+z bytes left to reduce, where 0<= z < 16.
+       // first, we reduce the data in the xmm7 register
+
+_final_reduction_for_128:
+       // check if any more data to fold. If not, compute the CRC of
+       // the final 128 bits
+       adds            arg3, arg3, #16
+       beq             _128_done
+
+       // here we are getting data that is less than 16 bytes.
+       // since we know that there was data before the pointer, we can
+       // offset the input pointer before the actual point, to receive
+       // exactly 16 bytes. after that the registers need to be adjusted.
+_get_last_two_regs:
+       add             arg2, arg2, arg3
+       sub             arg2, arg2, #16
+       vld1.64         {q1}, [arg2]
+CPU_LE(        vrev64.8        q1, q1                  )
+       vswp            d2, d3
+
+       // get rid of the extra data that was loaded before
+       // load the shift constant
+       adr             ip, tbl_shf_table + 16
+       sub             ip, ip, arg3
+       vld1.8          {q0}, [ip]
+
+       // shift v2 to the left by arg3 bytes
+       vtbl.8          d4, {d14-d15}, d0
+       vtbl.8          d5, {d14-d15}, d1
+
+       // shift v7 to the right by 16-arg3 bytes
+       vmov.i8         q9, #0x80
+       veor.8          q0, q0, q9
+       vtbl.8          d18, {d14-d15}, d0
+       vtbl.8          d19, {d14-d15}, d1
+
+       // blend
+       vshr.s8         q0, q0, #7              // convert to 8-bit mask
+       vbsl.8          q0, q2, q1
+
+       // fold 16 Bytes
+       vmull.p64       q8, d18, d20
+       vmull.p64       q7, d19, d21
+       veor.8          q7, q7, q8
+       veor.8          q7, q7, q0
+
+_128_done:
+       // compute crc of a 128-bit value
+       vldr            d20, rk5
+       vldr            d21, rk6                // rk5 and rk6 in xmm10
+
+       // 64b fold
+       vext.8          q0, qzr, q7, #8
+       vmull.p64       q7, d15, d20
+       veor.8          q7, q7, q0
+
+       // 32b fold
+       vext.8          q0, q7, qzr, #12
+       vmov            s31, s3
+       vmull.p64       q0, d0, d21
+       veor.8          q7, q0, q7
+
+       // barrett reduction
+_barrett:
+       vldr            d20, rk7
+       vldr            d21, rk8
+
+       vmull.p64       q0, d15, d20
+       vext.8          q0, qzr, q0, #12
+       vmull.p64       q0, d1, d21
+       vext.8          q0, qzr, q0, #12
+       veor.8          q7, q7, q0
+       vmov            r0, s29
+
+_cleanup:
+       // scale the result back to 16 bits
+       lsr             r0, r0, #16
+       bx              lr
+
+_less_than_128:
+       teq             arg3, #0
+       beq             _cleanup
+
+       vmov.i8         q0, #0
+       vmov            s3, arg1_low32          // get the initial crc value
+
+       vld1.64         {q7}, [arg2, :128]!
+CPU_LE(        vrev64.8        q7, q7          )
+       vswp            d14, d15
+       veor.8          q7, q7, q0
+
+       cmp             arg3, #16
+       beq             _128_done               // exactly 16 left
+       blt             _less_than_16_left
+
+       // now if there is, load the constants
+       vldr            d20, rk1
+       vldr            d21, rk2                // rk1 and rk2 in xmm10
+
+       // check if there is enough buffer to be able to fold 16B at a time
+       subs            arg3, arg3, #32
+       addlt           arg3, arg3, #16
+       blt             _get_last_two_regs
+       b               _16B_reduction_loop
+
+_less_than_16_left:
+       // shl r9, 4
+       adr             ip, tbl_shf_table + 16
+       sub             ip, ip, arg3
+       vld1.8          {q0}, [ip]
+       vmov.i8         q9, #0x80
+       veor.8          q0, q0, q9
+       vtbl.8          d18, {d14-d15}, d0
+       vtbl.8          d15, {d14-d15}, d1
+       vmov            d14, d18
+       b               _128_done
+ENDPROC(crc_t10dif_pmull)
+
+// precomputed constants
+// these constants are precomputed from the poly:
+// 0x8bb70000 (0x8bb7 scaled to 32 bits)
+       .align          4
+// Q = 0x18BB70000
+// rk1 = 2^(32*3) mod Q << 32
+// rk2 = 2^(32*5) mod Q << 32
+// rk3 = 2^(32*15) mod Q << 32
+// rk4 = 2^(32*17) mod Q << 32
+// rk5 = 2^(32*3) mod Q << 32
+// rk6 = 2^(32*2) mod Q << 32
+// rk7 = floor(2^64/Q)
+// rk8 = Q
+
+rk3:   .quad           0x9d9d000000000000
+rk4:   .quad           0x7cf5000000000000
+rk5:   .quad           0x2d56000000000000
+rk6:   .quad           0x1368000000000000
+rk7:   .quad           0x00000001f65a57f8
+rk8:   .quad           0x000000018bb70000
+rk9:   .quad           0xceae000000000000
+rk10:  .quad           0xbfd6000000000000
+rk11:  .quad           0x1e16000000000000
+rk12:  .quad           0x713c000000000000
+rk13:  .quad           0xf7f9000000000000
+rk14:  .quad           0x80a6000000000000
+rk15:  .quad           0x044c000000000000
+rk16:  .quad           0xe658000000000000
+rk17:  .quad           0xad18000000000000
+rk18:  .quad           0xa497000000000000
+rk19:  .quad           0x6ee3000000000000
+rk20:  .quad           0xe7b5000000000000
+rk1:   .quad           0x2d56000000000000
+rk2:   .quad           0x06df000000000000
+
+tbl_shf_table:
+// use these values for shift constants for the tbl/tbx instruction
+// different alignments result in values as shown:
+//     DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
+//     DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
+//     DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
+//     DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
+//     DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
+//     DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
+//     DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
+//     DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
+//     DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
+//     DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
+//     DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
+//     DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
+//     DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
+//     DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
+//     DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
+
+       .byte            0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
+       .byte           0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
+       .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+       .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c
new file mode 100644 (file)
index 0000000..d428355
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc-t10dif.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+#define CRC_T10DIF_PMULL_CHUNK_SIZE    16U
+
+asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u32 len);
+
+static int crct10dif_init(struct shash_desc *desc)
+{
+       u16 *crc = shash_desc_ctx(desc);
+
+       *crc = 0;
+       return 0;
+}
+
+static int crct10dif_update(struct shash_desc *desc, const u8 *data,
+                           unsigned int length)
+{
+       u16 *crc = shash_desc_ctx(desc);
+       unsigned int l;
+
+       if (!may_use_simd()) {
+               *crc = crc_t10dif_generic(*crc, data, length);
+       } else {
+               if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
+                       l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
+                                 ((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
+
+                       *crc = crc_t10dif_generic(*crc, data, l);
+
+                       length -= l;
+                       data += l;
+               }
+               if (length > 0) {
+                       kernel_neon_begin();
+                       *crc = crc_t10dif_pmull(*crc, data, length);
+                       kernel_neon_end();
+               }
+       }
+       return 0;
+}
+
+static int crct10dif_final(struct shash_desc *desc, u8 *out)
+{
+       u16 *crc = shash_desc_ctx(desc);
+
+       *(u16 *)out = *crc;
+       return 0;
+}
+
+static struct shash_alg crc_t10dif_alg = {
+       .digestsize             = CRC_T10DIF_DIGEST_SIZE,
+       .init                   = crct10dif_init,
+       .update                 = crct10dif_update,
+       .final                  = crct10dif_final,
+       .descsize               = CRC_T10DIF_DIGEST_SIZE,
+
+       .base.cra_name          = "crct10dif",
+       .base.cra_driver_name   = "crct10dif-arm-ce",
+       .base.cra_priority      = 200,
+       .base.cra_blocksize     = CRC_T10DIF_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+};
+
+static int __init crc_t10dif_mod_init(void)
+{
+       if (!(elf_hwcap2 & HWCAP2_PMULL))
+               return -ENODEV;
+
+       return crypto_register_shash(&crc_t10dif_alg);
+}
+
+static void __exit crc_t10dif_mod_exit(void)
+{
+       crypto_unregister_shash(&crc_t10dif_alg);
+}
+
+module_init(crc_t10dif_mod_init);
+module_exit(crc_t10dif_mod_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("crct10dif");
index 602e2c2..93ec8fe 100644 (file)
                                clocks = <&cpm_syscon0 1 21>;
                                status = "disabled";
                        };
+
+                       cpm_trng: trng@760000 {
+                               compatible = "marvell,armada-8k-rng", "inside-secure,safexcel-eip76";
+                               reg = <0x760000 0x7d>;
+                               interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+                               clocks = <&cpm_syscon0 1 25>;
+                               status = "okay";
+                       };
                };
 
                cpm_pcie0: pcie@f2600000 {
index 6bf9e24..ee8db05 100644 (file)
                                clocks = <&cps_syscon0 1 21>;
                                status = "disabled";
                        };
+
+                       cps_trng: trng@760000 {
+                               compatible = "marvell,armada-8k-rng", "inside-secure,safexcel-eip76";
+                               reg = <0x760000 0x7d>;
+                               interrupts = <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>;
+                               clocks = <&cps_syscon0 1 25>;
+                               status = "okay";
+                       };
                };
 
                cps_pcie0: pcie@f4600000 {
diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore
new file mode 100644 (file)
index 0000000..879df87
--- /dev/null
@@ -0,0 +1,2 @@
+sha256-core.S
+sha512-core.S
index 2cf32e9..450a85d 100644 (file)
@@ -8,6 +8,14 @@ menuconfig ARM64_CRYPTO
 
 if ARM64_CRYPTO
 
+config CRYPTO_SHA256_ARM64
+       tristate "SHA-224/SHA-256 digest algorithm for arm64"
+       select CRYPTO_HASH
+
+config CRYPTO_SHA512_ARM64
+       tristate "SHA-384/SHA-512 digest algorithm for arm64"
+       select CRYPTO_HASH
+
 config CRYPTO_SHA1_ARM64_CE
        tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
        depends on ARM64 && KERNEL_MODE_NEON
@@ -23,6 +31,16 @@ config CRYPTO_GHASH_ARM64_CE
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_HASH
 
+config CRYPTO_CRCT10DIF_ARM64_CE
+       tristate "CRCT10DIF digest algorithm using PMULL instructions"
+       depends on KERNEL_MODE_NEON && CRC_T10DIF
+       select CRYPTO_HASH
+
+config CRYPTO_CRC32_ARM64_CE
+       tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
+       depends on KERNEL_MODE_NEON && CRC32
+       select CRYPTO_HASH
+
 config CRYPTO_AES_ARM64_CE
        tristate "AES core cipher using ARMv8 Crypto Extensions"
        depends on ARM64 && KERNEL_MODE_NEON
@@ -40,17 +58,18 @@ config CRYPTO_AES_ARM64_CE_BLK
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_BLKCIPHER
        select CRYPTO_AES_ARM64_CE
-       select CRYPTO_ABLK_HELPER
+       select CRYPTO_SIMD
 
 config CRYPTO_AES_ARM64_NEON_BLK
        tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
        depends on ARM64 && KERNEL_MODE_NEON
        select CRYPTO_BLKCIPHER
        select CRYPTO_AES
-       select CRYPTO_ABLK_HELPER
+       select CRYPTO_SIMD
 
 config CRYPTO_CRC32_ARM64
        tristate "CRC32 and CRC32C using optional ARMv8 instructions"
        depends on ARM64
        select CRYPTO_HASH
+
 endif
index abb79b3..aa8888d 100644 (file)
@@ -17,6 +17,12 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
+obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
+crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
+
+obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
+crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
+
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
 CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
 
@@ -29,6 +35,12 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
 obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
 aes-neon-blk-y := aes-glue-neon.o aes-neon.o
 
+obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o
+sha256-arm64-y := sha256-glue.o sha256-core.o
+
+obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
+sha512-arm64-y := sha512-glue.o sha512-core.o
+
 AFLAGS_aes-ce.o                := -DINTERLEAVE=4
 AFLAGS_aes-neon.o      := -DINTERLEAVE=4
 
@@ -40,3 +52,14 @@ CFLAGS_crc32-arm64.o := -mcpu=generic+crc
 
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
        $(call if_changed_rule,cc_o_c)
+
+quiet_cmd_perlasm = PERLASM $@
+      cmd_perlasm = $(PERL) $(<) void $(@)
+
+$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
+       $(call cmd,perlasm)
+
+$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
+       $(call cmd,perlasm)
+
+.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S
index a2a7fbc..3363560 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
        .text
        .arch   armv8-a+crypto
@@ -19,7 +20,7 @@
         */
 ENTRY(ce_aes_ccm_auth_data)
        ldr     w8, [x3]                        /* leftover from prev round? */
-       ld1     {v0.2d}, [x0]                   /* load mac */
+       ld1     {v0.16b}, [x0]                  /* load mac */
        cbz     w8, 1f
        sub     w8, w8, #16
        eor     v1.16b, v1.16b, v1.16b
@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
        beq     8f                              /* out of input? */
        cbnz    w8, 0b
        eor     v0.16b, v0.16b, v1.16b
-1:     ld1     {v3.2d}, [x4]                   /* load first round key */
+1:     ld1     {v3.16b}, [x4]                  /* load first round key */
        prfm    pldl1strm, [x1]
        cmp     w5, #12                         /* which key size? */
        add     x6, x4, #16
@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
        mov     v5.16b, v3.16b
        b       4f
 2:     mov     v4.16b, v3.16b
-       ld1     {v5.2d}, [x6], #16              /* load 2nd round key */
+       ld1     {v5.16b}, [x6], #16             /* load 2nd round key */
 3:     aese    v0.16b, v4.16b
        aesmc   v0.16b, v0.16b
-4:     ld1     {v3.2d}, [x6], #16              /* load next round key */
+4:     ld1     {v3.16b}, [x6], #16             /* load next round key */
        aese    v0.16b, v5.16b
        aesmc   v0.16b, v0.16b
-5:     ld1     {v4.2d}, [x6], #16              /* load next round key */
+5:     ld1     {v4.16b}, [x6], #16             /* load next round key */
        subs    w7, w7, #3
        aese    v0.16b, v3.16b
        aesmc   v0.16b, v0.16b
-       ld1     {v5.2d}, [x6], #16              /* load next round key */
+       ld1     {v5.16b}, [x6], #16             /* load next round key */
        bpl     3b
        aese    v0.16b, v4.16b
        subs    w2, w2, #16                     /* last data? */
@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data)
        ld1     {v1.16b}, [x1], #16             /* load next input block */
        eor     v0.16b, v0.16b, v1.16b          /* xor with mac */
        bne     1b
-6:     st1     {v0.2d}, [x0]                   /* store mac */
+6:     st1     {v0.16b}, [x0]                  /* store mac */
        beq     10f
        adds    w2, w2, #16
        beq     10f
@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data)
        adds    w7, w7, #1
        bne     9b
        eor     v0.16b, v0.16b, v1.16b
-       st1     {v0.2d}, [x0]
+       st1     {v0.16b}, [x0]
 10:    str     w8, [x3]
        ret
 ENDPROC(ce_aes_ccm_auth_data)
@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data)
         *                       u32 rounds);
         */
 ENTRY(ce_aes_ccm_final)
-       ld1     {v3.2d}, [x2], #16              /* load first round key */
-       ld1     {v0.2d}, [x0]                   /* load mac */
+       ld1     {v3.16b}, [x2], #16             /* load first round key */
+       ld1     {v0.16b}, [x0]                  /* load mac */
        cmp     w3, #12                         /* which key size? */
        sub     w3, w3, #2                      /* modified # of rounds */
-       ld1     {v1.2d}, [x1]                   /* load 1st ctriv */
+       ld1     {v1.16b}, [x1]                  /* load 1st ctriv */
        bmi     0f
        bne     3f
        mov     v5.16b, v3.16b
        b       2f
 0:     mov     v4.16b, v3.16b
-1:     ld1     {v5.2d}, [x2], #16              /* load next round key */
+1:     ld1     {v5.16b}, [x2], #16             /* load next round key */
        aese    v0.16b, v4.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v4.16b
        aesmc   v1.16b, v1.16b
-2:     ld1     {v3.2d}, [x2], #16              /* load next round key */
+2:     ld1     {v3.16b}, [x2], #16             /* load next round key */
        aese    v0.16b, v5.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v5.16b
        aesmc   v1.16b, v1.16b
-3:     ld1     {v4.2d}, [x2], #16              /* load next round key */
+3:     ld1     {v4.16b}, [x2], #16             /* load next round key */
        subs    w3, w3, #3
        aese    v0.16b, v3.16b
        aesmc   v0.16b, v0.16b
@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final)
        aese    v1.16b, v4.16b
        /* final round key cancels out */
        eor     v0.16b, v0.16b, v1.16b          /* en-/decrypt the mac */
-       st1     {v0.2d}, [x0]                   /* store result */
+       st1     {v0.16b}, [x0]                  /* store result */
        ret
 ENDPROC(ce_aes_ccm_final)
 
        .macro  aes_ccm_do_crypt,enc
        ldr     x8, [x6, #8]                    /* load lower ctr */
-       ld1     {v0.2d}, [x5]                   /* load mac */
-       rev     x8, x8                          /* keep swabbed ctr in reg */
+       ld1     {v0.16b}, [x5]                  /* load mac */
+CPU_LE(        rev     x8, x8                  )       /* keep swabbed ctr in reg */
 0:     /* outer loop */
-       ld1     {v1.1d}, [x6]                   /* load upper ctr */
+       ld1     {v1.8b}, [x6]                   /* load upper ctr */
        prfm    pldl1strm, [x1]
        add     x8, x8, #1
        rev     x9, x8
        cmp     w4, #12                         /* which key size? */
        sub     w7, w4, #2                      /* get modified # of rounds */
        ins     v1.d[1], x9                     /* no carry in lower ctr */
-       ld1     {v3.2d}, [x3]                   /* load first round key */
+       ld1     {v3.16b}, [x3]                  /* load first round key */
        add     x10, x3, #16
        bmi     1f
        bne     4f
        mov     v5.16b, v3.16b
        b       3f
 1:     mov     v4.16b, v3.16b
-       ld1     {v5.2d}, [x10], #16             /* load 2nd round key */
+       ld1     {v5.16b}, [x10], #16            /* load 2nd round key */
 2:     /* inner loop: 3 rounds, 2x interleaved */
        aese    v0.16b, v4.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v4.16b
        aesmc   v1.16b, v1.16b
-3:     ld1     {v3.2d}, [x10], #16             /* load next round key */
+3:     ld1     {v3.16b}, [x10], #16            /* load next round key */
        aese    v0.16b, v5.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v5.16b
        aesmc   v1.16b, v1.16b
-4:     ld1     {v4.2d}, [x10], #16             /* load next round key */
+4:     ld1     {v4.16b}, [x10], #16            /* load next round key */
        subs    w7, w7, #3
        aese    v0.16b, v3.16b
        aesmc   v0.16b, v0.16b
        aese    v1.16b, v3.16b
        aesmc   v1.16b, v1.16b
-       ld1     {v5.2d}, [x10], #16             /* load next round key */
+       ld1     {v5.16b}, [x10], #16            /* load next round key */
        bpl     2b
        aese    v0.16b, v4.16b
        aese    v1.16b, v4.16b
@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final)
        eor     v0.16b, v0.16b, v2.16b          /* xor mac with pt ^ rk[last] */
        st1     {v1.16b}, [x0], #16             /* write output block */
        bne     0b
-       rev     x8, x8
-       st1     {v0.2d}, [x5]                   /* store mac */
+CPU_LE(        rev     x8, x8                  )
+       st1     {v0.16b}, [x5]                  /* store mac */
        str     x8, [x6, #8]                    /* store lsb end of ctr (BE) */
 5:     ret
 
 6:     eor     v0.16b, v0.16b, v5.16b          /* final round mac */
        eor     v1.16b, v1.16b, v5.16b          /* final round enc */
-       st1     {v0.2d}, [x5]                   /* store mac */
+       st1     {v0.16b}, [x5]                  /* store mac */
        add     w2, w2, #16                     /* process partial tail block */
 7:     ldrb    w9, [x1], #1                    /* get 1 byte of input */
        umov    w6, v1.b[0]                     /* get top crypted ctr byte */
index f4bf2f2..cc5515d 100644 (file)
@@ -11,9 +11,9 @@
 #include <asm/neon.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
-#include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 
 #include "aes-ce-setkey.h"
@@ -149,12 +149,7 @@ static int ccm_encrypt(struct aead_request *req)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
-       struct blkcipher_desc desc = { .info = req->iv };
-       struct blkcipher_walk walk;
-       struct scatterlist srcbuf[2];
-       struct scatterlist dstbuf[2];
-       struct scatterlist *src;
-       struct scatterlist *dst;
+       struct skcipher_walk walk;
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen;
@@ -172,27 +167,19 @@ static int ccm_encrypt(struct aead_request *req)
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-       src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
-       dst = src;
-       if (req->src != req->dst)
-               dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
-
-       blkcipher_walk_init(&walk, dst, src, len);
-       err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
-                                            AES_BLOCK_SIZE);
+       err = skcipher_walk_aead_encrypt(&walk, req, true);
 
        while (walk.nbytes) {
                u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               if (walk.nbytes == len)
+               if (walk.nbytes == walk.total)
                        tail = 0;
 
                ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   walk.nbytes - tail, ctx->key_enc,
                                   num_rounds(ctx), mac, walk.iv);
 
-               len -= walk.nbytes - tail;
-               err = blkcipher_walk_done(&desc, &walk, tail);
+               err = skcipher_walk_done(&walk, tail);
        }
        if (!err)
                ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
@@ -203,7 +190,7 @@ static int ccm_encrypt(struct aead_request *req)
                return err;
 
        /* copy authtag to end of dst */
-       scatterwalk_map_and_copy(mac, dst, req->cryptlen,
+       scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
                                 crypto_aead_authsize(aead), 1);
 
        return 0;
@@ -214,12 +201,7 @@ static int ccm_decrypt(struct aead_request *req)
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
        unsigned int authsize = crypto_aead_authsize(aead);
-       struct blkcipher_desc desc = { .info = req->iv };
-       struct blkcipher_walk walk;
-       struct scatterlist srcbuf[2];
-       struct scatterlist dstbuf[2];
-       struct scatterlist *src;
-       struct scatterlist *dst;
+       struct skcipher_walk walk;
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen - authsize;
@@ -237,27 +219,19 @@ static int ccm_decrypt(struct aead_request *req)
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-       src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
-       dst = src;
-       if (req->src != req->dst)
-               dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
-
-       blkcipher_walk_init(&walk, dst, src, len);
-       err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
-                                            AES_BLOCK_SIZE);
+       err = skcipher_walk_aead_decrypt(&walk, req, true);
 
        while (walk.nbytes) {
                u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-               if (walk.nbytes == len)
+               if (walk.nbytes == walk.total)
                        tail = 0;
 
                ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                   walk.nbytes - tail, ctx->key_enc,
                                   num_rounds(ctx), mac, walk.iv);
 
-               len -= walk.nbytes - tail;
-               err = blkcipher_walk_done(&desc, &walk, tail);
+               err = skcipher_walk_done(&walk, tail);
        }
        if (!err)
                ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
@@ -268,7 +242,8 @@ static int ccm_decrypt(struct aead_request *req)
                return err;
 
        /* compare calculated auth tag with the stored one */
-       scatterwalk_map_and_copy(buf, src, req->cryptlen - authsize,
+       scatterwalk_map_and_copy(buf, req->src,
+                                req->assoclen + req->cryptlen - authsize,
                                 authsize, 0);
 
        if (crypto_memneq(mac, buf, authsize))
@@ -287,6 +262,7 @@ static struct aead_alg ccm_aes_alg = {
                .cra_module             = THIS_MODULE,
        },
        .ivsize         = AES_BLOCK_SIZE,
+       .chunksize      = AES_BLOCK_SIZE,
        .maxauthsize    = AES_BLOCK_SIZE,
        .setkey         = ccm_setkey,
        .setauthsize    = ccm_setauthsize,
index f7bd9bf..50d9fe1 100644 (file)
@@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
        kernel_neon_begin_partial(4);
 
        __asm__("       ld1     {v0.16b}, %[in]                 ;"
-               "       ld1     {v1.2d}, [%[key]], #16          ;"
+               "       ld1     {v1.16b}, [%[key]], #16         ;"
                "       cmp     %w[rounds], #10                 ;"
                "       bmi     0f                              ;"
                "       bne     3f                              ;"
                "       mov     v3.16b, v1.16b                  ;"
                "       b       2f                              ;"
                "0:     mov     v2.16b, v1.16b                  ;"
-               "       ld1     {v3.2d}, [%[key]], #16          ;"
+               "       ld1     {v3.16b}, [%[key]], #16         ;"
                "1:     aese    v0.16b, v2.16b                  ;"
                "       aesmc   v0.16b, v0.16b                  ;"
-               "2:     ld1     {v1.2d}, [%[key]], #16          ;"
+               "2:     ld1     {v1.16b}, [%[key]], #16         ;"
                "       aese    v0.16b, v3.16b                  ;"
                "       aesmc   v0.16b, v0.16b                  ;"
-               "3:     ld1     {v2.2d}, [%[key]], #16          ;"
+               "3:     ld1     {v2.16b}, [%[key]], #16         ;"
                "       subs    %w[rounds], %w[rounds], #3      ;"
                "       aese    v0.16b, v1.16b                  ;"
                "       aesmc   v0.16b, v0.16b                  ;"
-               "       ld1     {v3.2d}, [%[key]], #16          ;"
+               "       ld1     {v3.16b}, [%[key]], #16         ;"
                "       bpl     1b                              ;"
                "       aese    v0.16b, v2.16b                  ;"
                "       eor     v0.16b, v0.16b, v3.16b          ;"
@@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
        kernel_neon_begin_partial(4);
 
        __asm__("       ld1     {v0.16b}, %[in]                 ;"
-               "       ld1     {v1.2d}, [%[key]], #16          ;"
+               "       ld1     {v1.16b}, [%[key]], #16         ;"
                "       cmp     %w[rounds], #10                 ;"
                "       bmi     0f                              ;"
                "       bne     3f                              ;"
                "       mov     v3.16b, v1.16b                  ;"
                "       b       2f                              ;"
                "0:     mov     v2.16b, v1.16b                  ;"
-               "       ld1     {v3.2d}, [%[key]], #16          ;"
+               "       ld1     {v3.16b}, [%[key]], #16         ;"
                "1:     aesd    v0.16b, v2.16b                  ;"
                "       aesimc  v0.16b, v0.16b                  ;"
-               "2:     ld1     {v1.2d}, [%[key]], #16          ;"
+               "2:     ld1     {v1.16b}, [%[key]], #16         ;"
                "       aesd    v0.16b, v3.16b                  ;"
                "       aesimc  v0.16b, v0.16b                  ;"
-               "3:     ld1     {v2.2d}, [%[key]], #16          ;"
+               "3:     ld1     {v2.16b}, [%[key]], #16         ;"
                "       subs    %w[rounds], %w[rounds], #3      ;"
                "       aesd    v0.16b, v1.16b                  ;"
                "       aesimc  v0.16b, v0.16b                  ;"
-               "       ld1     {v3.2d}, [%[key]], #16          ;"
+               "       ld1     {v3.16b}, [%[key]], #16         ;"
                "       bpl     1b                              ;"
                "       aesd    v0.16b, v2.16b                  ;"
                "       eor     v0.16b, v0.16b, v3.16b          ;"
@@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
                u32 *rki = ctx->key_enc + (i * kwords);
                u32 *rko = rki + kwords;
 
+#ifndef CONFIG_CPU_BIG_ENDIAN
                rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+#else
+               rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
+                        rki[0];
+#endif
                rko[1] = rko[0] ^ rki[1];
                rko[2] = rko[1] ^ rki[2];
                rko[3] = rko[2] ^ rki[3];
index 78f3cfe..b46093d 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #define AES_ENTRY(func)                ENTRY(ce_ ## func)
 #define AES_ENDPROC(func)      ENDPROC(ce_ ## func)
index 6b2aa0f..4e3f8ad 100644 (file)
@@ -11,8 +11,8 @@
 #include <asm/neon.h>
 #include <asm/hwcap.h>
 #include <crypto/aes.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
 #include <crypto/xts.h>
@@ -80,13 +80,19 @@ struct crypto_aes_xts_ctx {
        struct crypto_aes_ctx __aligned(8) key2;
 };
 
-static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+                              unsigned int key_len)
+{
+       return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
                       unsigned int key_len)
 {
-       struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int ret;
 
-       ret = xts_check_key(tfm, in_key, key_len);
+       ret = xts_verify_key(tfm, in_key, key_len);
        if (ret)
                return ret;
 
@@ -97,111 +103,101 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
        if (!ret)
                return 0;
 
-       tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+       crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
        return -EINVAL;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = 6 + ctx->key_length / 4;
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key_enc, rounds, blocks, first);
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = 6 + ctx->key_length / 4;
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key_dec, rounds, blocks, first);
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = 6 + ctx->key_length / 4;
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
                                first);
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = 6 + ctx->key_length / 4;
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key_dec, rounds, blocks, walk.iv,
                                first);
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
        return err;
 }
 
-static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int ctr_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = 6 + ctx->key_length / 4;
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+       err = skcipher_walk_virt(&walk, req, true);
 
        first = 1;
        kernel_neon_begin();
@@ -209,17 +205,14 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key_enc, rounds, blocks, walk.iv,
                                first);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
                first = 0;
-               nbytes -= blocks * AES_BLOCK_SIZE;
-               if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
-                       break;
-               err = blkcipher_walk_done(desc, &walk,
-                                         walk.nbytes % AES_BLOCK_SIZE);
        }
-       if (walk.nbytes % AES_BLOCK_SIZE) {
-               u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-               u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+       if (walk.nbytes) {
                u8 __aligned(8) tail[AES_BLOCK_SIZE];
+               unsigned int nbytes = walk.nbytes;
+               u8 *tdst = walk.dst.virt.addr;
+               u8 *tsrc = walk.src.virt.addr;
 
                /*
                 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
@@ -230,227 +223,169 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
                                blocks, walk.iv, first);
                memcpy(tdst, tail, nbytes);
-               err = blkcipher_walk_done(desc, &walk, 0);
+               err = skcipher_walk_done(&walk, 0);
        }
        kernel_neon_end();
 
        return err;
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = 6 + ctx->key1.key_length / 4;
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key1.key_enc, rounds, blocks,
                                (u8 *)ctx->key2.key_enc, walk.iv, first);
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
 
        return err;
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err, first, rounds = 6 + ctx->key1.key_length / 4;
-       struct blkcipher_walk walk;
+       struct skcipher_walk walk;
        unsigned int blocks;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_neon_begin();
        for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
                aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key1.key_dec, rounds, blocks,
                                (u8 *)ctx->key2.key_enc, walk.iv, first);
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        kernel_neon_end();
 
        return err;
 }
 
-static struct crypto_alg aes_algs[] = { {
-       .cra_name               = "__ecb-aes-" MODE,
-       .cra_driver_name        = "__driver-ecb-aes-" MODE,
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = 0,
-               .setkey         = aes_setkey,
-               .encrypt        = ecb_encrypt,
-               .decrypt        = ecb_decrypt,
+static struct skcipher_alg aes_algs[] = { {
+       .base = {
+               .cra_name               = "__ecb(aes)",
+               .cra_driver_name        = "__ecb-aes-" MODE,
+               .cra_priority           = PRIO,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .setkey         = skcipher_aes_setkey,
+       .encrypt        = ecb_encrypt,
+       .decrypt        = ecb_decrypt,
 }, {
-       .cra_name               = "__cbc-aes-" MODE,
-       .cra_driver_name        = "__driver-cbc-aes-" MODE,
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = aes_setkey,
-               .encrypt        = cbc_encrypt,
-               .decrypt        = cbc_decrypt,
+       .base = {
+               .cra_name               = "__cbc(aes)",
+               .cra_driver_name        = "__cbc-aes-" MODE,
+               .cra_priority           = PRIO,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .setkey         = skcipher_aes_setkey,
+       .encrypt        = cbc_encrypt,
+       .decrypt        = cbc_decrypt,
 }, {
-       .cra_name               = "__ctr-aes-" MODE,
-       .cra_driver_name        = "__driver-ctr-aes-" MODE,
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = aes_setkey,
-               .encrypt        = ctr_encrypt,
-               .decrypt        = ctr_encrypt,
+       .base = {
+               .cra_name               = "__ctr(aes)",
+               .cra_driver_name        = "__ctr-aes-" MODE,
+               .cra_priority           = PRIO,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = 1,
+               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
+       .min_keysize    = AES_MIN_KEY_SIZE,
+       .max_keysize    = AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .chunksize      = AES_BLOCK_SIZE,
+       .setkey         = skcipher_aes_setkey,
+       .encrypt        = ctr_encrypt,
+       .decrypt        = ctr_encrypt,
 }, {
-       .cra_name               = "__xts-aes-" MODE,
-       .cra_driver_name        = "__driver-xts-aes-" MODE,
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_xts_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_blkcipher = {
-               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = xts_set_key,
-               .encrypt        = xts_encrypt,
-               .decrypt        = xts_decrypt,
+       .base = {
+               .cra_name               = "__xts(aes)",
+               .cra_driver_name        = "__xts-aes-" MODE,
+               .cra_priority           = PRIO,
+               .cra_flags              = CRYPTO_ALG_INTERNAL,
+               .cra_blocksize          = AES_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct crypto_aes_xts_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
        },
-}, {
-       .cra_name               = "ecb(aes)",
-       .cra_driver_name        = "ecb-aes-" MODE,
-       .cra_priority           = PRIO,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = 0,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "cbc(aes)",
-       .cra_driver_name        = "cbc-aes-" MODE,
-       .cra_priority           = PRIO,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "ctr(aes)",
-       .cra_driver_name        = "ctr-aes-" MODE,
-       .cra_priority           = PRIO,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = AES_MIN_KEY_SIZE,
-               .max_keysize    = AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
-}, {
-       .cra_name               = "xts(aes)",
-       .cra_driver_name        = "xts-aes-" MODE,
-       .cra_priority           = PRIO,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_ablkcipher = {
-               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-               .ivsize         = AES_BLOCK_SIZE,
-               .setkey         = ablk_set_key,
-               .encrypt        = ablk_encrypt,
-               .decrypt        = ablk_decrypt,
-       }
+       .min_keysize    = 2 * AES_MIN_KEY_SIZE,
+       .max_keysize    = 2 * AES_MAX_KEY_SIZE,
+       .ivsize         = AES_BLOCK_SIZE,
+       .setkey         = xts_set_key,
+       .encrypt        = xts_encrypt,
+       .decrypt        = xts_decrypt,
 } };
 
-static int __init aes_init(void)
+static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
+
+static void aes_exit(void)
 {
-       return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
+               simd_skcipher_free(aes_simd_algs[i]);
+
+       crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 }
 
-static void __exit aes_exit(void)
+static int __init aes_init(void)
 {
-       crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+       struct simd_skcipher_alg *simd;
+       const char *basename;
+       const char *algname;
+       const char *drvname;
+       int err;
+       int i;
+
+       err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+       if (err)
+               return err;
+
+       for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+               algname = aes_algs[i].base.cra_name + 2;
+               drvname = aes_algs[i].base.cra_driver_name + 2;
+               basename = aes_algs[i].base.cra_driver_name;
+               simd = simd_skcipher_create_compat(algname, drvname, basename);
+               err = PTR_ERR(simd);
+               if (IS_ERR(simd))
+                       goto unregister_simds;
+
+               aes_simd_algs[i] = simd;
+       }
+
+       return 0;
+
+unregister_simds:
+       aes_exit();
+       return err;
 }
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
index f6e372c..c53dbea 100644 (file)
@@ -386,7 +386,8 @@ AES_ENDPROC(aes_ctr_encrypt)
        .endm
 
 .Lxts_mul_x:
-       .word           1, 0, 0x87, 0
+CPU_LE(        .quad           1, 0x87         )
+CPU_BE(        .quad           0x87, 1         )
 
 AES_ENTRY(aes_xts_encrypt)
        FRAME_PUSH
index b93170e..85f07ea 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #define AES_ENTRY(func)                ENTRY(neon_ ## func)
 #define AES_ENDPROC(func)      ENDPROC(neon_ ## func)
        .endm
 
        .macro          do_block, enc, in, rounds, rk, rkp, i
-       ld1             {v15.16b}, [\rk]
+       ld1             {v15.4s}, [\rk]
        add             \rkp, \rk, #16
        mov             \i, \rounds
 1111:  eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
        tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
        sub_bytes       \in
-       ld1             {v15.16b}, [\rkp], #16
+       ld1             {v15.4s}, [\rkp], #16
        subs            \i, \i, #1
        beq             2222f
        .if             \enc == 1
        .endm
 
        .macro          do_block_2x, enc, in0, in1 rounds, rk, rkp, i
-       ld1             {v15.16b}, [\rk]
+       ld1             {v15.4s}, [\rk]
        add             \rkp, \rk, #16
        mov             \i, \rounds
 1111:  eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
        sub_bytes_2x    \in0, \in1
        tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
        tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
-       ld1             {v15.16b}, [\rkp], #16
+       ld1             {v15.4s}, [\rkp], #16
        subs            \i, \i, #1
        beq             2222f
        .if             \enc == 1
        .endm
 
        .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
-       ld1             {v15.16b}, [\rk]
+       ld1             {v15.4s}, [\rk]
        add             \rkp, \rk, #16
        mov             \i, \rounds
 1111:  eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
        tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
        tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
        tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
-       ld1             {v15.16b}, [\rkp], #16
+       ld1             {v15.4s}, [\rkp], #16
        subs            \i, \i, #1
        beq             2222f
        .if             \enc == 1
        .text
        .align          4
 .LForward_ShiftRows:
-       .byte           0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
-       .byte           0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+CPU_LE(        .byte           0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3  )
+CPU_LE(        .byte           0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb  )
+CPU_BE(        .byte           0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8  )
+CPU_BE(        .byte           0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0  )
 
 .LReverse_ShiftRows:
-       .byte           0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
-       .byte           0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+CPU_LE(        .byte           0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb  )
+CPU_LE(        .byte           0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3  )
+CPU_BE(        .byte           0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8  )
+CPU_BE(        .byte           0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0  )
 
 .LForward_Sbox:
        .byte           0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
new file mode 100644 (file)
index 0000000..18f5a84
--- /dev/null
@@ -0,0 +1,266 @@
+/*
+ * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
+ * calculation.
+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
+ * at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2B: Instruction Set Reference, N-Z
+ *
+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
+ *           Alexander Boyko <Alexander_Boyko@xyratex.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+       .text
+       .align          6
+       .cpu            generic+crypto+crc
+
+.Lcrc32_constants:
+       /*
+        * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
+        * #define CONSTANT_R1  0x154442bd4LL
+        *
+        * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
+        * #define CONSTANT_R2  0x1c6e41596LL
+        */
+       .octa           0x00000001c6e415960000000154442bd4
+
+       /*
+        * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
+        * #define CONSTANT_R3  0x1751997d0LL
+        *
+        * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
+        * #define CONSTANT_R4  0x0ccaa009eLL
+        */
+       .octa           0x00000000ccaa009e00000001751997d0
+
+       /*
+        * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
+        * #define CONSTANT_R5  0x163cd6124LL
+        */
+       .quad           0x0000000163cd6124
+       .quad           0x00000000FFFFFFFF
+
+       /*
+        * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
+        *
+        * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
+        *                                                      = 0x1F7011641LL
+        * #define CONSTANT_RU  0x1F7011641LL
+        */
+       .octa           0x00000001F701164100000001DB710641
+
+.Lcrc32c_constants:
+       .octa           0x000000009e4addf800000000740eef02
+       .octa           0x000000014cd00bd600000000f20c0dfe
+       .quad           0x00000000dd45aab8
+       .quad           0x00000000FFFFFFFF
+       .octa           0x00000000dea713f10000000105ec76f0
+
+       vCONSTANT       .req    v0
+       dCONSTANT       .req    d0
+       qCONSTANT       .req    q0
+
+       BUF             .req    x0
+       LEN             .req    x1
+       CRC             .req    x2
+
+       vzr             .req    v9
+
+       /**
+        * Calculate crc32
+        * BUF - buffer
+        * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
+        * CRC - initial crc32
+        * return %eax crc32
+        * uint crc32_pmull_le(unsigned char const *buffer,
+        *                     size_t len, uint crc32)
+        */
+ENTRY(crc32_pmull_le)
+       adr             x3, .Lcrc32_constants
+       b               0f
+
+ENTRY(crc32c_pmull_le)
+       adr             x3, .Lcrc32c_constants
+
+0:     bic             LEN, LEN, #15
+       ld1             {v1.16b-v4.16b}, [BUF], #0x40
+       movi            vzr.16b, #0
+       fmov            dCONSTANT, CRC
+       eor             v1.16b, v1.16b, vCONSTANT.16b
+       sub             LEN, LEN, #0x40
+       cmp             LEN, #0x40
+       b.lt            less_64
+
+       ldr             qCONSTANT, [x3]
+
+loop_64:               /* 64 bytes Full cache line folding */
+       sub             LEN, LEN, #0x40
+
+       pmull2          v5.1q, v1.2d, vCONSTANT.2d
+       pmull2          v6.1q, v2.2d, vCONSTANT.2d
+       pmull2          v7.1q, v3.2d, vCONSTANT.2d
+       pmull2          v8.1q, v4.2d, vCONSTANT.2d
+
+       pmull           v1.1q, v1.1d, vCONSTANT.1d
+       pmull           v2.1q, v2.1d, vCONSTANT.1d
+       pmull           v3.1q, v3.1d, vCONSTANT.1d
+       pmull           v4.1q, v4.1d, vCONSTANT.1d
+
+       eor             v1.16b, v1.16b, v5.16b
+       ld1             {v5.16b}, [BUF], #0x10
+       eor             v2.16b, v2.16b, v6.16b
+       ld1             {v6.16b}, [BUF], #0x10
+       eor             v3.16b, v3.16b, v7.16b
+       ld1             {v7.16b}, [BUF], #0x10
+       eor             v4.16b, v4.16b, v8.16b
+       ld1             {v8.16b}, [BUF], #0x10
+
+       eor             v1.16b, v1.16b, v5.16b
+       eor             v2.16b, v2.16b, v6.16b
+       eor             v3.16b, v3.16b, v7.16b
+       eor             v4.16b, v4.16b, v8.16b
+
+       cmp             LEN, #0x40
+       b.ge            loop_64
+
+less_64:               /* Folding cache line into 128bit */
+       ldr             qCONSTANT, [x3, #16]
+
+       pmull2          v5.1q, v1.2d, vCONSTANT.2d
+       pmull           v1.1q, v1.1d, vCONSTANT.1d
+       eor             v1.16b, v1.16b, v5.16b
+       eor             v1.16b, v1.16b, v2.16b
+
+       pmull2          v5.1q, v1.2d, vCONSTANT.2d
+       pmull           v1.1q, v1.1d, vCONSTANT.1d
+       eor             v1.16b, v1.16b, v5.16b
+       eor             v1.16b, v1.16b, v3.16b
+
+       pmull2          v5.1q, v1.2d, vCONSTANT.2d
+       pmull           v1.1q, v1.1d, vCONSTANT.1d
+       eor             v1.16b, v1.16b, v5.16b
+       eor             v1.16b, v1.16b, v4.16b
+
+       cbz             LEN, fold_64
+
+loop_16:               /* Folding rest buffer into 128bit */
+       subs            LEN, LEN, #0x10
+
+       ld1             {v2.16b}, [BUF], #0x10
+       pmull2          v5.1q, v1.2d, vCONSTANT.2d
+       pmull           v1.1q, v1.1d, vCONSTANT.1d
+       eor             v1.16b, v1.16b, v5.16b
+       eor             v1.16b, v1.16b, v2.16b
+
+       b.ne            loop_16
+
+fold_64:
+       /* perform the last 64 bit fold, also adds 32 zeroes
+        * to the input stream */
+       ext             v2.16b, v1.16b, v1.16b, #8
+       pmull2          v2.1q, v2.2d, vCONSTANT.2d
+       ext             v1.16b, v1.16b, vzr.16b, #8
+       eor             v1.16b, v1.16b, v2.16b
+
+       /* final 32-bit fold */
+       ldr             dCONSTANT, [x3, #32]
+       ldr             d3, [x3, #40]
+
+       ext             v2.16b, v1.16b, vzr.16b, #4
+       and             v1.16b, v1.16b, v3.16b
+       pmull           v1.1q, v1.1d, vCONSTANT.1d
+       eor             v1.16b, v1.16b, v2.16b
+
+       /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
+       ldr             qCONSTANT, [x3, #48]
+
+       and             v2.16b, v1.16b, v3.16b
+       ext             v2.16b, vzr.16b, v2.16b, #8
+       pmull2          v2.1q, v2.2d, vCONSTANT.2d
+       and             v2.16b, v2.16b, v3.16b
+       pmull           v2.1q, v2.1d, vCONSTANT.1d
+       eor             v1.16b, v1.16b, v2.16b
+       mov             w0, v1.s[1]
+
+       ret
+ENDPROC(crc32_pmull_le)
+ENDPROC(crc32c_pmull_le)
+
+       .macro          __crc32, c
+0:     subs            x2, x2, #16
+       b.mi            8f
+       ldp             x3, x4, [x1], #16
+CPU_BE(        rev             x3, x3          )
+CPU_BE(        rev             x4, x4          )
+       crc32\c\()x     w0, w0, x3
+       crc32\c\()x     w0, w0, x4
+       b.ne            0b
+       ret
+
+8:     tbz             x2, #3, 4f
+       ldr             x3, [x1], #8
+CPU_BE(        rev             x3, x3          )
+       crc32\c\()x     w0, w0, x3
+4:     tbz             x2, #2, 2f
+       ldr             w3, [x1], #4
+CPU_BE(        rev             w3, w3          )
+       crc32\c\()w     w0, w0, w3
+2:     tbz             x2, #1, 1f
+       ldrh            w3, [x1], #2
+CPU_BE(        rev16           w3, w3          )
+       crc32\c\()h     w0, w0, w3
+1:     tbz             x2, #0, 0f
+       ldrb            w3, [x1]
+       crc32\c\()b     w0, w0, w3
+0:     ret
+       .endm
+
+       .align          5
+ENTRY(crc32_armv8_le)
+       __crc32
+ENDPROC(crc32_armv8_le)
+
+       .align          5
+ENTRY(crc32c_armv8_le)
+       __crc32         c
+ENDPROC(crc32c_armv8_le)
diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c
new file mode 100644 (file)
index 0000000..8594127
--- /dev/null
@@ -0,0 +1,212 @@
+/*
+ * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+
+#define PMULL_MIN_LEN          64L     /* minimum size of buffer
+                                        * for crc32_pmull_le_16 */
+#define SCALE_F                        16L     /* size of NEON register */
+
+asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
+asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
+
+asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
+asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
+
+static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
+static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
+
+static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
+{
+       u32 *key = crypto_tfm_ctx(tfm);
+
+       *key = 0;
+       return 0;
+}
+
+static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
+{
+       u32 *key = crypto_tfm_ctx(tfm);
+
+       *key = ~0;
+       return 0;
+}
+
+static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
+                             unsigned int keylen)
+{
+       u32 *mctx = crypto_shash_ctx(hash);
+
+       if (keylen != sizeof(u32)) {
+               crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       *mctx = le32_to_cpup((__le32 *)key);
+       return 0;
+}
+
+static int crc32_pmull_init(struct shash_desc *desc)
+{
+       u32 *mctx = crypto_shash_ctx(desc->tfm);
+       u32 *crc = shash_desc_ctx(desc);
+
+       *crc = *mctx;
+       return 0;
+}
+
+static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int length)
+{
+       u32 *crc = shash_desc_ctx(desc);
+       unsigned int l;
+
+       if ((u64)data % SCALE_F) {
+               l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
+
+               *crc = fallback_crc32(*crc, data, l);
+
+               data += l;
+               length -= l;
+       }
+
+       if (length >= PMULL_MIN_LEN) {
+               l = round_down(length, SCALE_F);
+
+               kernel_neon_begin_partial(10);
+               *crc = crc32_pmull_le(data, l, *crc);
+               kernel_neon_end();
+
+               data += l;
+               length -= l;
+       }
+
+       if (length > 0)
+               *crc = fallback_crc32(*crc, data, length);
+
+       return 0;
+}
+
+static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int length)
+{
+       u32 *crc = shash_desc_ctx(desc);
+       unsigned int l;
+
+       if ((u64)data % SCALE_F) {
+               l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
+
+               *crc = fallback_crc32c(*crc, data, l);
+
+               data += l;
+               length -= l;
+       }
+
+       if (length >= PMULL_MIN_LEN) {
+               l = round_down(length, SCALE_F);
+
+               kernel_neon_begin_partial(10);
+               *crc = crc32c_pmull_le(data, l, *crc);
+               kernel_neon_end();
+
+               data += l;
+               length -= l;
+       }
+
+       if (length > 0) {
+               *crc = fallback_crc32c(*crc, data, length);
+       }
+
+       return 0;
+}
+
+static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
+{
+       u32 *crc = shash_desc_ctx(desc);
+
+       put_unaligned_le32(*crc, out);
+       return 0;
+}
+
+static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
+{
+       u32 *crc = shash_desc_ctx(desc);
+
+       put_unaligned_le32(~*crc, out);
+       return 0;
+}
+
+static struct shash_alg crc32_pmull_algs[] = { {
+       .setkey                 = crc32_pmull_setkey,
+       .init                   = crc32_pmull_init,
+       .update                 = crc32_pmull_update,
+       .final                  = crc32_pmull_final,
+       .descsize               = sizeof(u32),
+       .digestsize             = sizeof(u32),
+
+       .base.cra_ctxsize       = sizeof(u32),
+       .base.cra_init          = crc32_pmull_cra_init,
+       .base.cra_name          = "crc32",
+       .base.cra_driver_name   = "crc32-arm64-ce",
+       .base.cra_priority      = 200,
+       .base.cra_blocksize     = 1,
+       .base.cra_module        = THIS_MODULE,
+}, {
+       .setkey                 = crc32_pmull_setkey,
+       .init                   = crc32_pmull_init,
+       .update                 = crc32c_pmull_update,
+       .final                  = crc32c_pmull_final,
+       .descsize               = sizeof(u32),
+       .digestsize             = sizeof(u32),
+
+       .base.cra_ctxsize       = sizeof(u32),
+       .base.cra_init          = crc32c_pmull_cra_init,
+       .base.cra_name          = "crc32c",
+       .base.cra_driver_name   = "crc32c-arm64-ce",
+       .base.cra_priority      = 200,
+       .base.cra_blocksize     = 1,
+       .base.cra_module        = THIS_MODULE,
+} };
+
+static int __init crc32_pmull_mod_init(void)
+{
+       if (elf_hwcap & HWCAP_CRC32) {
+               fallback_crc32 = crc32_armv8_le;
+               fallback_crc32c = crc32c_armv8_le;
+       } else {
+               fallback_crc32 = crc32_le;
+               fallback_crc32c = __crc32c_le;
+       }
+
+       return crypto_register_shashes(crc32_pmull_algs,
+                                      ARRAY_SIZE(crc32_pmull_algs));
+}
+
+static void __exit crc32_pmull_mod_exit(void)
+{
+       crypto_unregister_shashes(crc32_pmull_algs,
+                                 ARRAY_SIZE(crc32_pmull_algs));
+}
+
+module_cpu_feature_match(PMULL, crc32_pmull_mod_init);
+module_exit(crc32_pmull_mod_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
new file mode 100644 (file)
index 0000000..d5b5a8c
--- /dev/null
@@ -0,0 +1,392 @@
+//
+// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
+//
+// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+
+//
+// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
+//
+// Copyright (c) 2013, Intel Corporation
+//
+// Authors:
+//     Erdinc Ozturk <erdinc.ozturk@intel.com>
+//     Vinodh Gopal <vinodh.gopal@intel.com>
+//     James Guilford <james.guilford@intel.com>
+//     Tim Chen <tim.c.chen@linux.intel.com>
+//
+// This software is available to you under a choice of one of two
+// licenses.  You may choose to be licensed under the terms of the GNU
+// General Public License (GPL) Version 2, available from the file
+// COPYING in the main directory of this source tree, or the
+// OpenIB.org BSD license below:
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the
+//   distribution.
+//
+// * Neither the name of the Intel Corporation nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+//
+// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//       Function API:
+//       UINT16 crc_t10dif_pcl(
+//               UINT16 init_crc, //initial CRC value, 16 bits
+//               const unsigned char *buf, //buffer pointer to calculate CRC on
+//               UINT64 len //buffer length in bytes (64-bit data)
+//       );
+//
+//       Reference paper titled "Fast CRC Computation for Generic
+//     Polynomials Using PCLMULQDQ Instruction"
+//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
+//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+//
+//
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+       .text
+       .cpu            generic+crypto
+
+       arg1_low32      .req    w0
+       arg2            .req    x1
+       arg3            .req    x2
+
+       vzr             .req    v13
+
+ENTRY(crc_t10dif_pmull)
+       movi            vzr.16b, #0             // init zero register
+
+       // adjust the 16-bit initial_crc value, scale it to 32 bits
+       lsl             arg1_low32, arg1_low32, #16
+
+       // check if smaller than 256
+       cmp             arg3, #256
+
+       // for sizes less than 128, we can't fold 64B at a time...
+       b.lt            _less_than_128
+
+       // load the initial crc value
+       // crc value does not need to be byte-reflected, but it needs
+       // to be moved to the high part of the register.
+       // because data will be byte-reflected and will align with
+       // initial crc at correct place.
+       movi            v10.16b, #0
+       mov             v10.s[3], arg1_low32            // initial crc
+
+       // receive the initial 64B data, xor the initial crc value
+       ldp             q0, q1, [arg2]
+       ldp             q2, q3, [arg2, #0x20]
+       ldp             q4, q5, [arg2, #0x40]
+       ldp             q6, q7, [arg2, #0x60]
+       add             arg2, arg2, #0x80
+
+CPU_LE(        rev64           v0.16b, v0.16b                  )
+CPU_LE(        rev64           v1.16b, v1.16b                  )
+CPU_LE(        rev64           v2.16b, v2.16b                  )
+CPU_LE(        rev64           v3.16b, v3.16b                  )
+CPU_LE(        rev64           v4.16b, v4.16b                  )
+CPU_LE(        rev64           v5.16b, v5.16b                  )
+CPU_LE(        rev64           v6.16b, v6.16b                  )
+CPU_LE(        rev64           v7.16b, v7.16b                  )
+
+CPU_LE(        ext             v0.16b, v0.16b, v0.16b, #8      )
+CPU_LE(        ext             v1.16b, v1.16b, v1.16b, #8      )
+CPU_LE(        ext             v2.16b, v2.16b, v2.16b, #8      )
+CPU_LE(        ext             v3.16b, v3.16b, v3.16b, #8      )
+CPU_LE(        ext             v4.16b, v4.16b, v4.16b, #8      )
+CPU_LE(        ext             v5.16b, v5.16b, v5.16b, #8      )
+CPU_LE(        ext             v6.16b, v6.16b, v6.16b, #8      )
+CPU_LE(        ext             v7.16b, v7.16b, v7.16b, #8      )
+
+       // XOR the initial_crc value
+       eor             v0.16b, v0.16b, v10.16b
+
+       ldr             q10, rk3        // xmm10 has rk3 and rk4
+                                       // type of pmull instruction
+                                       // will determine which constant to use
+
+       //
+       // we subtract 256 instead of 128 to save one instruction from the loop
+       //
+       sub             arg3, arg3, #256
+
+       // at this section of the code, there is 64*x+y (0<=y<64) bytes of
+       // buffer. The _fold_64_B_loop will fold 64B at a time
+       // until we have 64+y Bytes of buffer
+
+
+       // fold 64B at a time. This section of the code folds 4 vector
+       // registers in parallel
+_fold_64_B_loop:
+
+       .macro          fold64, reg1, reg2
+       ldp             q11, q12, [arg2], #0x20
+
+       pmull2          v8.1q, \reg1\().2d, v10.2d
+       pmull           \reg1\().1q, \reg1\().1d, v10.1d
+
+CPU_LE(        rev64           v11.16b, v11.16b                )
+CPU_LE(        rev64           v12.16b, v12.16b                )
+
+       pmull2          v9.1q, \reg2\().2d, v10.2d
+       pmull           \reg2\().1q, \reg2\().1d, v10.1d
+
+CPU_LE(        ext             v11.16b, v11.16b, v11.16b, #8   )
+CPU_LE(        ext             v12.16b, v12.16b, v12.16b, #8   )
+
+       eor             \reg1\().16b, \reg1\().16b, v8.16b
+       eor             \reg2\().16b, \reg2\().16b, v9.16b
+       eor             \reg1\().16b, \reg1\().16b, v11.16b
+       eor             \reg2\().16b, \reg2\().16b, v12.16b
+       .endm
+
+       fold64          v0, v1
+       fold64          v2, v3
+       fold64          v4, v5
+       fold64          v6, v7
+
+       subs            arg3, arg3, #128
+
+       // check if there is another 64B in the buffer to be able to fold
+       b.ge            _fold_64_B_loop
+
+       // at this point, the buffer pointer is pointing at the last y Bytes
+       // of the buffer the 64B of folded data is in 4 of the vector
+       // registers: v0, v1, v2, v3
+
+       // fold the 8 vector registers to 1 vector register with different
+       // constants
+
+       ldr             q10, rk9
+
+       .macro          fold16, reg, rk
+       pmull           v8.1q, \reg\().1d, v10.1d
+       pmull2          \reg\().1q, \reg\().2d, v10.2d
+       .ifnb           \rk
+       ldr             q10, \rk
+       .endif
+       eor             v7.16b, v7.16b, v8.16b
+       eor             v7.16b, v7.16b, \reg\().16b
+       .endm
+
+       fold16          v0, rk11
+       fold16          v1, rk13
+       fold16          v2, rk15
+       fold16          v3, rk17
+       fold16          v4, rk19
+       fold16          v5, rk1
+       fold16          v6
+
+       // instead of 64, we add 48 to the loop counter to save 1 instruction
+       // from the loop instead of a cmp instruction, we use the negative
+       // flag with the jl instruction
+       adds            arg3, arg3, #(128-16)
+       b.lt            _final_reduction_for_128
+
+       // now we have 16+y bytes left to reduce. 16 Bytes is in register v7
+       // and the rest is in memory. We can fold 16 bytes at a time if y>=16
+       // continue folding 16B at a time
+
+_16B_reduction_loop:
+       pmull           v8.1q, v7.1d, v10.1d
+       pmull2          v7.1q, v7.2d, v10.2d
+       eor             v7.16b, v7.16b, v8.16b
+
+       ldr             q0, [arg2], #16
+CPU_LE(        rev64           v0.16b, v0.16b                  )
+CPU_LE(        ext             v0.16b, v0.16b, v0.16b, #8      )
+       eor             v7.16b, v7.16b, v0.16b
+       subs            arg3, arg3, #16
+
+       // instead of a cmp instruction, we utilize the flags with the
+       // jge instruction equivalent of: cmp arg3, 16-16
+       // check if there is any more 16B in the buffer to be able to fold
+       b.ge            _16B_reduction_loop
+
+       // now we have 16+z bytes left to reduce, where 0<= z < 16.
+       // first, we reduce the data in the xmm7 register
+
+_final_reduction_for_128:
+       // check if any more data to fold. If not, compute the CRC of
+       // the final 128 bits
+       adds            arg3, arg3, #16
+       b.eq            _128_done
+
+       // here we are getting data that is less than 16 bytes.
+       // since we know that there was data before the pointer, we can
+       // offset the input pointer before the actual point, to receive
+       // exactly 16 bytes. after that the registers need to be adjusted.
+_get_last_two_regs:
+       add             arg2, arg2, arg3
+       ldr             q1, [arg2, #-16]
+CPU_LE(        rev64           v1.16b, v1.16b                  )
+CPU_LE(        ext             v1.16b, v1.16b, v1.16b, #8      )
+
+       // get rid of the extra data that was loaded before
+       // load the shift constant
+       adr             x4, tbl_shf_table + 16
+       sub             x4, x4, arg3
+       ld1             {v0.16b}, [x4]
+
+       // shift v2 to the left by arg3 bytes
+       tbl             v2.16b, {v7.16b}, v0.16b
+
+       // shift v7 to the right by 16-arg3 bytes
+       movi            v9.16b, #0x80
+       eor             v0.16b, v0.16b, v9.16b
+       tbl             v7.16b, {v7.16b}, v0.16b
+
+       // blend
+       sshr            v0.16b, v0.16b, #7      // convert to 8-bit mask
+       bsl             v0.16b, v2.16b, v1.16b
+
+       // fold 16 Bytes
+       pmull           v8.1q, v7.1d, v10.1d
+       pmull2          v7.1q, v7.2d, v10.2d
+       eor             v7.16b, v7.16b, v8.16b
+       eor             v7.16b, v7.16b, v0.16b
+
+_128_done:
+       // compute crc of a 128-bit value
+       ldr             q10, rk5                // rk5 and rk6 in xmm10
+
+       // 64b fold
+       ext             v0.16b, vzr.16b, v7.16b, #8
+       mov             v7.d[0], v7.d[1]
+       pmull           v7.1q, v7.1d, v10.1d
+       eor             v7.16b, v7.16b, v0.16b
+
+       // 32b fold
+       ext             v0.16b, v7.16b, vzr.16b, #4
+       mov             v7.s[3], vzr.s[0]
+       pmull2          v0.1q, v0.2d, v10.2d
+       eor             v7.16b, v7.16b, v0.16b
+
+       // barrett reduction
+_barrett:
+       ldr             q10, rk7
+       mov             v0.d[0], v7.d[1]
+
+       pmull           v0.1q, v0.1d, v10.1d
+       ext             v0.16b, vzr.16b, v0.16b, #12
+       pmull2          v0.1q, v0.2d, v10.2d
+       ext             v0.16b, vzr.16b, v0.16b, #12
+       eor             v7.16b, v7.16b, v0.16b
+       mov             w0, v7.s[1]
+
+_cleanup:
+       // scale the result back to 16 bits
+       lsr             x0, x0, #16
+       ret
+
+_less_than_128:
+       cbz             arg3, _cleanup
+
+       movi            v0.16b, #0
+       mov             v0.s[3], arg1_low32     // get the initial crc value
+
+       ldr             q7, [arg2], #0x10
+CPU_LE(        rev64           v7.16b, v7.16b                  )
+CPU_LE(        ext             v7.16b, v7.16b, v7.16b, #8      )
+       eor             v7.16b, v7.16b, v0.16b  // xor the initial crc value
+
+       cmp             arg3, #16
+       b.eq            _128_done               // exactly 16 left
+       b.lt            _less_than_16_left
+
+       ldr             q10, rk1                // rk1 and rk2 in xmm10
+
+       // update the counter. subtract 32 instead of 16 to save one
+       // instruction from the loop
+       subs            arg3, arg3, #32
+       b.ge            _16B_reduction_loop
+
+       add             arg3, arg3, #16
+       b               _get_last_two_regs
+
+_less_than_16_left:
+       // shl r9, 4
+       adr             x0, tbl_shf_table + 16
+       sub             x0, x0, arg3
+       ld1             {v0.16b}, [x0]
+       movi            v9.16b, #0x80
+       eor             v0.16b, v0.16b, v9.16b
+       tbl             v7.16b, {v7.16b}, v0.16b
+       b               _128_done
+ENDPROC(crc_t10dif_pmull)
+
+// precomputed constants
+// these constants are precomputed from the poly:
+// 0x8bb70000 (0x8bb7 scaled to 32 bits)
+       .align          4
+// Q = 0x18BB70000
+// rk1 = 2^(32*3) mod Q << 32
+// rk2 = 2^(32*5) mod Q << 32
+// rk3 = 2^(32*15) mod Q << 32
+// rk4 = 2^(32*17) mod Q << 32
+// rk5 = 2^(32*3) mod Q << 32
+// rk6 = 2^(32*2) mod Q << 32
+// rk7 = floor(2^64/Q)
+// rk8 = Q
+
+rk1:   .octa           0x06df0000000000002d56000000000000
+rk3:   .octa           0x7cf50000000000009d9d000000000000
+rk5:   .octa           0x13680000000000002d56000000000000
+rk7:   .octa           0x000000018bb7000000000001f65a57f8
+rk9:   .octa           0xbfd6000000000000ceae000000000000
+rk11:  .octa           0x713c0000000000001e16000000000000
+rk13:  .octa           0x80a6000000000000f7f9000000000000
+rk15:  .octa           0xe658000000000000044c000000000000
+rk17:  .octa           0xa497000000000000ad18000000000000
+rk19:  .octa           0xe7b50000000000006ee3000000000000
+
+tbl_shf_table:
+// use these values for shift constants for the tbl/tbx instruction
+// different alignments result in values as shown:
+//     DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
+//     DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
+//     DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
+//     DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
+//     DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
+//     DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
+//     DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
+//     DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
+//     DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
+//     DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
+//     DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
+//     DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
+//     DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
+//     DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
+//     DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
+
+       .byte            0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
+       .byte           0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
+       .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+       .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
new file mode 100644 (file)
index 0000000..60cb590
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/crc-t10dif.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/neon.h>
+
+#define CRC_T10DIF_PMULL_CHUNK_SIZE    16U
+
+asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
+
+static int crct10dif_init(struct shash_desc *desc)
+{
+       u16 *crc = shash_desc_ctx(desc);
+
+       *crc = 0;
+       return 0;
+}
+
+static int crct10dif_update(struct shash_desc *desc, const u8 *data,
+                           unsigned int length)
+{
+       u16 *crc = shash_desc_ctx(desc);
+       unsigned int l;
+
+       if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
+               l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
+                         ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
+
+               *crc = crc_t10dif_generic(*crc, data, l);
+
+               length -= l;
+               data += l;
+       }
+
+       if (length > 0) {
+               kernel_neon_begin_partial(14);
+               *crc = crc_t10dif_pmull(*crc, data, length);
+               kernel_neon_end();
+       }
+
+       return 0;
+}
+
+static int crct10dif_final(struct shash_desc *desc, u8 *out)
+{
+       u16 *crc = shash_desc_ctx(desc);
+
+       *(u16 *)out = *crc;
+       return 0;
+}
+
+static struct shash_alg crc_t10dif_alg = {
+       .digestsize             = CRC_T10DIF_DIGEST_SIZE,
+       .init                   = crct10dif_init,
+       .update                 = crct10dif_update,
+       .final                  = crct10dif_final,
+       .descsize               = CRC_T10DIF_DIGEST_SIZE,
+
+       .base.cra_name          = "crct10dif",
+       .base.cra_driver_name   = "crct10dif-arm64-ce",
+       .base.cra_priority      = 200,
+       .base.cra_blocksize     = CRC_T10DIF_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+};
+
+static int __init crc_t10dif_mod_init(void)
+{
+       return crypto_register_shash(&crc_t10dif_alg);
+}
+
+static void __exit crc_t10dif_mod_exit(void)
+{
+       crypto_unregister_shash(&crc_t10dif_alg);
+}
+
+module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
+module_exit(crc_t10dif_mod_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
index dc45701..f0bb9f0 100644 (file)
@@ -29,8 +29,8 @@
         *                         struct ghash_key const *k, const char *head)
         */
 ENTRY(pmull_ghash_update)
-       ld1             {SHASH.16b}, [x3]
-       ld1             {XL.16b}, [x1]
+       ld1             {SHASH.2d}, [x3]
+       ld1             {XL.2d}, [x1]
        movi            MASK.16b, #0xe1
        ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
        shl             MASK.2d, MASK.2d, #57
@@ -74,6 +74,6 @@ CPU_LE(       rev64           T1.16b, T1.16b  )
 
        cbnz            w0, 0b
 
-       st1             {XL.16b}, [x1]
+       st1             {XL.2d}, [x1]
        ret
 ENDPROC(pmull_ghash_update)
index 033aae6..c98e7e8 100644 (file)
@@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform)
        ld1r            {k3.4s}, [x6]
 
        /* load state */
-       ldr             dga, [x0]
+       ld1             {dgav.4s}, [x0]
        ldr             dgb, [x0, #16]
 
        /* load sha1_ce_state::finalize */
@@ -144,7 +144,7 @@ CPU_LE(     rev32           v11.16b, v11.16b        )
        b               1b
 
        /* store new state */
-3:     str             dga, [x0]
+3:     st1             {dgav.4s}, [x0]
        str             dgb, [x0, #16]
        ret
 ENDPROC(sha1_ce_transform)
index 5df9d9d..01cfee0 100644 (file)
@@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform)
        ld1             {v12.4s-v15.4s}, [x8]
 
        /* load state */
-       ldp             dga, dgb, [x0]
+       ld1             {dgav.4s, dgbv.4s}, [x0]
 
        /* load sha256_ce_state::finalize */
        ldr             w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
@@ -148,6 +148,6 @@ CPU_LE(     rev32           v19.16b, v19.16b        )
        b               1b
 
        /* store new state */
-3:     stp             dga, dgb, [x0]
+3:     st1             {dgav.4s, dgbv.4s}, [x0]
        ret
 ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped
new file mode 100644 (file)
index 0000000..3ce82cc
--- /dev/null
@@ -0,0 +1,2061 @@
+// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the OpenSSL license (the "License").  You may not use
+// this file except in compliance with the License.  You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+
+// ====================================================================
+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+// project. The module is, however, dual licensed under OpenSSL and
+// CRYPTOGAMS licenses depending on where you obtain it. For further
+// details see http://www.openssl.org/~appro/cryptogams/.
+//
+// Permission to use under GPLv2 terms is granted.
+// ====================================================================
+//
+// SHA256/512 for ARMv8.
+//
+// Performance in cycles per processed byte and improvement coefficient
+// over code generated with "default" compiler:
+//
+//             SHA256-hw       SHA256(*)       SHA512
+// Apple A7    1.97            10.5 (+33%)     6.73 (-1%(**))
+// Cortex-A53  2.38            15.5 (+115%)    10.0 (+150%(***))
+// Cortex-A57  2.31            11.6 (+86%)     7.51 (+260%(***))
+// Denver      2.01            10.5 (+26%)     6.70 (+8%)
+// X-Gene                      20.0 (+100%)    12.8 (+300%(***))
+// Mongoose    2.36            13.0 (+50%)     8.36 (+33%)
+//
+// (*) Software SHA256 results are of lesser relevance, presented
+//     mostly for informational purposes.
+// (**)        The result is a trade-off: it's possible to improve it by
+//     10% (or by 1 cycle per round), but at the cost of 20% loss
+//     on Cortex-A53 (or by 4 cycles per round).
+// (***)       Super-impressive coefficients over gcc-generated code are
+//     indication of some compiler "pathology", most notably code
+//     generated with -mgeneral-regs-only is significanty faster
+//     and the gap is only 40-90%.
+//
+// October 2016.
+//
+// Originally it was reckoned that it makes no sense to implement NEON
+// version of SHA256 for 64-bit processors. This is because performance
+// improvement on most wide-spread Cortex-A5x processors was observed
+// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
+// observed that 32-bit NEON SHA256 performs significantly better than
+// 64-bit scalar version on *some* of the more recent processors. As
+// result 64-bit NEON version of SHA256 was added to provide best
+// all-round performance. For example it executes ~30% faster on X-Gene
+// and Mongoose. [For reference, NEON version of SHA512 is bound to
+// deliver much less improvement, likely *negative* on Cortex-A5x.
+// Which is why NEON support is limited to SHA256.]
+
+#ifndef        __KERNEL__
+# include "arm_arch.h"
+#endif
+
+.text
+
+.extern        OPENSSL_armcap_P
+.globl sha256_block_data_order
+.type  sha256_block_data_order,%function
+.align 6
+sha256_block_data_order:
+#ifndef        __KERNEL__
+# ifdef        __ILP32__
+       ldrsw   x16,.LOPENSSL_armcap_P
+# else
+       ldr     x16,.LOPENSSL_armcap_P
+# endif
+       adr     x17,.LOPENSSL_armcap_P
+       add     x16,x16,x17
+       ldr     w16,[x16]
+       tst     w16,#ARMV8_SHA256
+       b.ne    .Lv8_entry
+       tst     w16,#ARMV7_NEON
+       b.ne    .Lneon_entry
+#endif
+       stp     x29,x30,[sp,#-128]!
+       add     x29,sp,#0
+
+       stp     x19,x20,[sp,#16]
+       stp     x21,x22,[sp,#32]
+       stp     x23,x24,[sp,#48]
+       stp     x25,x26,[sp,#64]
+       stp     x27,x28,[sp,#80]
+       sub     sp,sp,#4*4
+
+       ldp     w20,w21,[x0]                            // load context
+       ldp     w22,w23,[x0,#2*4]
+       ldp     w24,w25,[x0,#4*4]
+       add     x2,x1,x2,lsl#6  // end of input
+       ldp     w26,w27,[x0,#6*4]
+       adr     x30,.LK256
+       stp     x0,x2,[x29,#96]
+
+.Loop:
+       ldp     w3,w4,[x1],#2*4
+       ldr     w19,[x30],#4                    // *K++
+       eor     w28,w21,w22                             // magic seed
+       str     x1,[x29,#112]
+#ifndef        __AARCH64EB__
+       rev     w3,w3                   // 0
+#endif
+       ror     w16,w24,#6
+       add     w27,w27,w19                     // h+=K[i]
+       eor     w6,w24,w24,ror#14
+       and     w17,w25,w24
+       bic     w19,w26,w24
+       add     w27,w27,w3                      // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w20,w21                     // a^b, b^c in next round
+       eor     w16,w16,w6,ror#11       // Sigma1(e)
+       ror     w6,w20,#2
+       add     w27,w27,w17                     // h+=Ch(e,f,g)
+       eor     w17,w20,w20,ror#9
+       add     w27,w27,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w23,w23,w27                     // d+=h
+       eor     w28,w28,w21                     // Maj(a,b,c)
+       eor     w17,w6,w17,ror#13       // Sigma0(a)
+       add     w27,w27,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w27,w27,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w4,w4                   // 1
+#endif
+       ldp     w5,w6,[x1],#2*4
+       add     w27,w27,w17                     // h+=Sigma0(a)
+       ror     w16,w23,#6
+       add     w26,w26,w28                     // h+=K[i]
+       eor     w7,w23,w23,ror#14
+       and     w17,w24,w23
+       bic     w28,w25,w23
+       add     w26,w26,w4                      // h+=X[i]
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w27,w20                     // a^b, b^c in next round
+       eor     w16,w16,w7,ror#11       // Sigma1(e)
+       ror     w7,w27,#2
+       add     w26,w26,w17                     // h+=Ch(e,f,g)
+       eor     w17,w27,w27,ror#9
+       add     w26,w26,w16                     // h+=Sigma1(e)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       add     w22,w22,w26                     // d+=h
+       eor     w19,w19,w20                     // Maj(a,b,c)
+       eor     w17,w7,w17,ror#13       // Sigma0(a)
+       add     w26,w26,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       //add   w26,w26,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w5,w5                   // 2
+#endif
+       add     w26,w26,w17                     // h+=Sigma0(a)
+       ror     w16,w22,#6
+       add     w25,w25,w19                     // h+=K[i]
+       eor     w8,w22,w22,ror#14
+       and     w17,w23,w22
+       bic     w19,w24,w22
+       add     w25,w25,w5                      // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w26,w27                     // a^b, b^c in next round
+       eor     w16,w16,w8,ror#11       // Sigma1(e)
+       ror     w8,w26,#2
+       add     w25,w25,w17                     // h+=Ch(e,f,g)
+       eor     w17,w26,w26,ror#9
+       add     w25,w25,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w21,w21,w25                     // d+=h
+       eor     w28,w28,w27                     // Maj(a,b,c)
+       eor     w17,w8,w17,ror#13       // Sigma0(a)
+       add     w25,w25,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w25,w25,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w6,w6                   // 3
+#endif
+       ldp     w7,w8,[x1],#2*4
+       add     w25,w25,w17                     // h+=Sigma0(a)
+       ror     w16,w21,#6
+       add     w24,w24,w28                     // h+=K[i]
+       eor     w9,w21,w21,ror#14
+       and     w17,w22,w21
+       bic     w28,w23,w21
+       add     w24,w24,w6                      // h+=X[i]
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w25,w26                     // a^b, b^c in next round
+       eor     w16,w16,w9,ror#11       // Sigma1(e)
+       ror     w9,w25,#2
+       add     w24,w24,w17                     // h+=Ch(e,f,g)
+       eor     w17,w25,w25,ror#9
+       add     w24,w24,w16                     // h+=Sigma1(e)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       add     w20,w20,w24                     // d+=h
+       eor     w19,w19,w26                     // Maj(a,b,c)
+       eor     w17,w9,w17,ror#13       // Sigma0(a)
+       add     w24,w24,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       //add   w24,w24,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w7,w7                   // 4
+#endif
+       add     w24,w24,w17                     // h+=Sigma0(a)
+       ror     w16,w20,#6
+       add     w23,w23,w19                     // h+=K[i]
+       eor     w10,w20,w20,ror#14
+       and     w17,w21,w20
+       bic     w19,w22,w20
+       add     w23,w23,w7                      // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w24,w25                     // a^b, b^c in next round
+       eor     w16,w16,w10,ror#11      // Sigma1(e)
+       ror     w10,w24,#2
+       add     w23,w23,w17                     // h+=Ch(e,f,g)
+       eor     w17,w24,w24,ror#9
+       add     w23,w23,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w27,w27,w23                     // d+=h
+       eor     w28,w28,w25                     // Maj(a,b,c)
+       eor     w17,w10,w17,ror#13      // Sigma0(a)
+       add     w23,w23,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w23,w23,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w8,w8                   // 5
+#endif
+       ldp     w9,w10,[x1],#2*4
+       add     w23,w23,w17                     // h+=Sigma0(a)
+       ror     w16,w27,#6
+       add     w22,w22,w28                     // h+=K[i]
+       eor     w11,w27,w27,ror#14
+       and     w17,w20,w27
+       bic     w28,w21,w27
+       add     w22,w22,w8                      // h+=X[i]
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w23,w24                     // a^b, b^c in next round
+       eor     w16,w16,w11,ror#11      // Sigma1(e)
+       ror     w11,w23,#2
+       add     w22,w22,w17                     // h+=Ch(e,f,g)
+       eor     w17,w23,w23,ror#9
+       add     w22,w22,w16                     // h+=Sigma1(e)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       add     w26,w26,w22                     // d+=h
+       eor     w19,w19,w24                     // Maj(a,b,c)
+       eor     w17,w11,w17,ror#13      // Sigma0(a)
+       add     w22,w22,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       //add   w22,w22,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w9,w9                   // 6
+#endif
+       add     w22,w22,w17                     // h+=Sigma0(a)
+       ror     w16,w26,#6
+       add     w21,w21,w19                     // h+=K[i]
+       eor     w12,w26,w26,ror#14
+       and     w17,w27,w26
+       bic     w19,w20,w26
+       add     w21,w21,w9                      // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w22,w23                     // a^b, b^c in next round
+       eor     w16,w16,w12,ror#11      // Sigma1(e)
+       ror     w12,w22,#2
+       add     w21,w21,w17                     // h+=Ch(e,f,g)
+       eor     w17,w22,w22,ror#9
+       add     w21,w21,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w25,w25,w21                     // d+=h
+       eor     w28,w28,w23                     // Maj(a,b,c)
+       eor     w17,w12,w17,ror#13      // Sigma0(a)
+       add     w21,w21,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w21,w21,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w10,w10                 // 7
+#endif
+       ldp     w11,w12,[x1],#2*4
+       add     w21,w21,w17                     // h+=Sigma0(a)
+       ror     w16,w25,#6
+       add     w20,w20,w28                     // h+=K[i]
+       eor     w13,w25,w25,ror#14
+       and     w17,w26,w25
+       bic     w28,w27,w25
+       add     w20,w20,w10                     // h+=X[i]
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w21,w22                     // a^b, b^c in next round
+       eor     w16,w16,w13,ror#11      // Sigma1(e)
+       ror     w13,w21,#2
+       add     w20,w20,w17                     // h+=Ch(e,f,g)
+       eor     w17,w21,w21,ror#9
+       add     w20,w20,w16                     // h+=Sigma1(e)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       add     w24,w24,w20                     // d+=h
+       eor     w19,w19,w22                     // Maj(a,b,c)
+       eor     w17,w13,w17,ror#13      // Sigma0(a)
+       add     w20,w20,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       //add   w20,w20,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w11,w11                 // 8
+#endif
+       add     w20,w20,w17                     // h+=Sigma0(a)
+       ror     w16,w24,#6
+       add     w27,w27,w19                     // h+=K[i]
+       eor     w14,w24,w24,ror#14
+       and     w17,w25,w24
+       bic     w19,w26,w24
+       add     w27,w27,w11                     // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w20,w21                     // a^b, b^c in next round
+       eor     w16,w16,w14,ror#11      // Sigma1(e)
+       ror     w14,w20,#2
+       add     w27,w27,w17                     // h+=Ch(e,f,g)
+       eor     w17,w20,w20,ror#9
+       add     w27,w27,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w23,w23,w27                     // d+=h
+       eor     w28,w28,w21                     // Maj(a,b,c)
+       eor     w17,w14,w17,ror#13      // Sigma0(a)
+       add     w27,w27,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w27,w27,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w12,w12                 // 9
+#endif
+       ldp     w13,w14,[x1],#2*4
+       add     w27,w27,w17                     // h+=Sigma0(a)
+       ror     w16,w23,#6
+       add     w26,w26,w28                     // h+=K[i]
+       eor     w15,w23,w23,ror#14
+       and     w17,w24,w23
+       bic     w28,w25,w23
+       add     w26,w26,w12                     // h+=X[i]
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w27,w20                     // a^b, b^c in next round
+       eor     w16,w16,w15,ror#11      // Sigma1(e)
+       ror     w15,w27,#2
+       add     w26,w26,w17                     // h+=Ch(e,f,g)
+       eor     w17,w27,w27,ror#9
+       add     w26,w26,w16                     // h+=Sigma1(e)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       add     w22,w22,w26                     // d+=h
+       eor     w19,w19,w20                     // Maj(a,b,c)
+       eor     w17,w15,w17,ror#13      // Sigma0(a)
+       add     w26,w26,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       //add   w26,w26,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w13,w13                 // 10
+#endif
+       add     w26,w26,w17                     // h+=Sigma0(a)
+       ror     w16,w22,#6
+       add     w25,w25,w19                     // h+=K[i]
+       eor     w0,w22,w22,ror#14
+       and     w17,w23,w22
+       bic     w19,w24,w22
+       add     w25,w25,w13                     // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w26,w27                     // a^b, b^c in next round
+       eor     w16,w16,w0,ror#11       // Sigma1(e)
+       ror     w0,w26,#2
+       add     w25,w25,w17                     // h+=Ch(e,f,g)
+       eor     w17,w26,w26,ror#9
+       add     w25,w25,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w21,w21,w25                     // d+=h
+       eor     w28,w28,w27                     // Maj(a,b,c)
+       eor     w17,w0,w17,ror#13       // Sigma0(a)
+       add     w25,w25,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w25,w25,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w14,w14                 // 11
+#endif
+       ldp     w15,w0,[x1],#2*4
+       add     w25,w25,w17                     // h+=Sigma0(a)
+       str     w6,[sp,#12]
+       ror     w16,w21,#6
+       add     w24,w24,w28                     // h+=K[i]
+       eor     w6,w21,w21,ror#14
+       and     w17,w22,w21
+       bic     w28,w23,w21
+       add     w24,w24,w14                     // h+=X[i]
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w25,w26                     // a^b, b^c in next round
+       eor     w16,w16,w6,ror#11       // Sigma1(e)
+       ror     w6,w25,#2
+       add     w24,w24,w17                     // h+=Ch(e,f,g)
+       eor     w17,w25,w25,ror#9
+       add     w24,w24,w16                     // h+=Sigma1(e)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       add     w20,w20,w24                     // d+=h
+       eor     w19,w19,w26                     // Maj(a,b,c)
+       eor     w17,w6,w17,ror#13       // Sigma0(a)
+       add     w24,w24,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       //add   w24,w24,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w15,w15                 // 12
+#endif
+       add     w24,w24,w17                     // h+=Sigma0(a)
+       str     w7,[sp,#0]
+       ror     w16,w20,#6
+       add     w23,w23,w19                     // h+=K[i]
+       eor     w7,w20,w20,ror#14
+       and     w17,w21,w20
+       bic     w19,w22,w20
+       add     w23,w23,w15                     // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w24,w25                     // a^b, b^c in next round
+       eor     w16,w16,w7,ror#11       // Sigma1(e)
+       ror     w7,w24,#2
+       add     w23,w23,w17                     // h+=Ch(e,f,g)
+       eor     w17,w24,w24,ror#9
+       add     w23,w23,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w27,w27,w23                     // d+=h
+       eor     w28,w28,w25                     // Maj(a,b,c)
+       eor     w17,w7,w17,ror#13       // Sigma0(a)
+       add     w23,w23,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w23,w23,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w0,w0                   // 13
+#endif
+       ldp     w1,w2,[x1]
+       add     w23,w23,w17                     // h+=Sigma0(a)
+       str     w8,[sp,#4]
+       ror     w16,w27,#6
+       add     w22,w22,w28                     // h+=K[i]
+       eor     w8,w27,w27,ror#14
+       and     w17,w20,w27
+       bic     w28,w21,w27
+       add     w22,w22,w0                      // h+=X[i]
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w23,w24                     // a^b, b^c in next round
+       eor     w16,w16,w8,ror#11       // Sigma1(e)
+       ror     w8,w23,#2
+       add     w22,w22,w17                     // h+=Ch(e,f,g)
+       eor     w17,w23,w23,ror#9
+       add     w22,w22,w16                     // h+=Sigma1(e)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       add     w26,w26,w22                     // d+=h
+       eor     w19,w19,w24                     // Maj(a,b,c)
+       eor     w17,w8,w17,ror#13       // Sigma0(a)
+       add     w22,w22,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       //add   w22,w22,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w1,w1                   // 14
+#endif
+       ldr     w6,[sp,#12]
+       add     w22,w22,w17                     // h+=Sigma0(a)
+       str     w9,[sp,#8]
+       ror     w16,w26,#6
+       add     w21,w21,w19                     // h+=K[i]
+       eor     w9,w26,w26,ror#14
+       and     w17,w27,w26
+       bic     w19,w20,w26
+       add     w21,w21,w1                      // h+=X[i]
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w22,w23                     // a^b, b^c in next round
+       eor     w16,w16,w9,ror#11       // Sigma1(e)
+       ror     w9,w22,#2
+       add     w21,w21,w17                     // h+=Ch(e,f,g)
+       eor     w17,w22,w22,ror#9
+       add     w21,w21,w16                     // h+=Sigma1(e)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       add     w25,w25,w21                     // d+=h
+       eor     w28,w28,w23                     // Maj(a,b,c)
+       eor     w17,w9,w17,ror#13       // Sigma0(a)
+       add     w21,w21,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       //add   w21,w21,w17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     w2,w2                   // 15
+#endif
+       ldr     w7,[sp,#0]
+       add     w21,w21,w17                     // h+=Sigma0(a)
+       str     w10,[sp,#12]
+       ror     w16,w25,#6
+       add     w20,w20,w28                     // h+=K[i]
+       ror     w9,w4,#7
+       and     w17,w26,w25
+       ror     w8,w1,#17
+       bic     w28,w27,w25
+       ror     w10,w21,#2
+       add     w20,w20,w2                      // h+=X[i]
+       eor     w16,w16,w25,ror#11
+       eor     w9,w9,w4,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w21,w22                     // a^b, b^c in next round
+       eor     w16,w16,w25,ror#25      // Sigma1(e)
+       eor     w10,w10,w21,ror#13
+       add     w20,w20,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w8,w8,w1,ror#19
+       eor     w9,w9,w4,lsr#3  // sigma0(X[i+1])
+       add     w20,w20,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w22                     // Maj(a,b,c)
+       eor     w17,w10,w21,ror#22      // Sigma0(a)
+       eor     w8,w8,w1,lsr#10 // sigma1(X[i+14])
+       add     w3,w3,w12
+       add     w24,w24,w20                     // d+=h
+       add     w20,w20,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w3,w3,w9
+       add     w20,w20,w17                     // h+=Sigma0(a)
+       add     w3,w3,w8
+.Loop_16_xx:
+       ldr     w8,[sp,#4]
+       str     w11,[sp,#0]
+       ror     w16,w24,#6
+       add     w27,w27,w19                     // h+=K[i]
+       ror     w10,w5,#7
+       and     w17,w25,w24
+       ror     w9,w2,#17
+       bic     w19,w26,w24
+       ror     w11,w20,#2
+       add     w27,w27,w3                      // h+=X[i]
+       eor     w16,w16,w24,ror#11
+       eor     w10,w10,w5,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w20,w21                     // a^b, b^c in next round
+       eor     w16,w16,w24,ror#25      // Sigma1(e)
+       eor     w11,w11,w20,ror#13
+       add     w27,w27,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w9,w9,w2,ror#19
+       eor     w10,w10,w5,lsr#3        // sigma0(X[i+1])
+       add     w27,w27,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w21                     // Maj(a,b,c)
+       eor     w17,w11,w20,ror#22      // Sigma0(a)
+       eor     w9,w9,w2,lsr#10 // sigma1(X[i+14])
+       add     w4,w4,w13
+       add     w23,w23,w27                     // d+=h
+       add     w27,w27,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w4,w4,w10
+       add     w27,w27,w17                     // h+=Sigma0(a)
+       add     w4,w4,w9
+       ldr     w9,[sp,#8]
+       str     w12,[sp,#4]
+       ror     w16,w23,#6
+       add     w26,w26,w28                     // h+=K[i]
+       ror     w11,w6,#7
+       and     w17,w24,w23
+       ror     w10,w3,#17
+       bic     w28,w25,w23
+       ror     w12,w27,#2
+       add     w26,w26,w4                      // h+=X[i]
+       eor     w16,w16,w23,ror#11
+       eor     w11,w11,w6,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w27,w20                     // a^b, b^c in next round
+       eor     w16,w16,w23,ror#25      // Sigma1(e)
+       eor     w12,w12,w27,ror#13
+       add     w26,w26,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w10,w10,w3,ror#19
+       eor     w11,w11,w6,lsr#3        // sigma0(X[i+1])
+       add     w26,w26,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w20                     // Maj(a,b,c)
+       eor     w17,w12,w27,ror#22      // Sigma0(a)
+       eor     w10,w10,w3,lsr#10       // sigma1(X[i+14])
+       add     w5,w5,w14
+       add     w22,w22,w26                     // d+=h
+       add     w26,w26,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w5,w5,w11
+       add     w26,w26,w17                     // h+=Sigma0(a)
+       add     w5,w5,w10
+       ldr     w10,[sp,#12]
+       str     w13,[sp,#8]
+       ror     w16,w22,#6
+       add     w25,w25,w19                     // h+=K[i]
+       ror     w12,w7,#7
+       and     w17,w23,w22
+       ror     w11,w4,#17
+       bic     w19,w24,w22
+       ror     w13,w26,#2
+       add     w25,w25,w5                      // h+=X[i]
+       eor     w16,w16,w22,ror#11
+       eor     w12,w12,w7,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w26,w27                     // a^b, b^c in next round
+       eor     w16,w16,w22,ror#25      // Sigma1(e)
+       eor     w13,w13,w26,ror#13
+       add     w25,w25,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w11,w11,w4,ror#19
+       eor     w12,w12,w7,lsr#3        // sigma0(X[i+1])
+       add     w25,w25,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w27                     // Maj(a,b,c)
+       eor     w17,w13,w26,ror#22      // Sigma0(a)
+       eor     w11,w11,w4,lsr#10       // sigma1(X[i+14])
+       add     w6,w6,w15
+       add     w21,w21,w25                     // d+=h
+       add     w25,w25,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w6,w6,w12
+       add     w25,w25,w17                     // h+=Sigma0(a)
+       add     w6,w6,w11
+       ldr     w11,[sp,#0]
+       str     w14,[sp,#12]
+       ror     w16,w21,#6
+       add     w24,w24,w28                     // h+=K[i]
+       ror     w13,w8,#7
+       and     w17,w22,w21
+       ror     w12,w5,#17
+       bic     w28,w23,w21
+       ror     w14,w25,#2
+       add     w24,w24,w6                      // h+=X[i]
+       eor     w16,w16,w21,ror#11
+       eor     w13,w13,w8,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w25,w26                     // a^b, b^c in next round
+       eor     w16,w16,w21,ror#25      // Sigma1(e)
+       eor     w14,w14,w25,ror#13
+       add     w24,w24,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w12,w12,w5,ror#19
+       eor     w13,w13,w8,lsr#3        // sigma0(X[i+1])
+       add     w24,w24,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w26                     // Maj(a,b,c)
+       eor     w17,w14,w25,ror#22      // Sigma0(a)
+       eor     w12,w12,w5,lsr#10       // sigma1(X[i+14])
+       add     w7,w7,w0
+       add     w20,w20,w24                     // d+=h
+       add     w24,w24,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w7,w7,w13
+       add     w24,w24,w17                     // h+=Sigma0(a)
+       add     w7,w7,w12
+       ldr     w12,[sp,#4]
+       str     w15,[sp,#0]
+       ror     w16,w20,#6
+       add     w23,w23,w19                     // h+=K[i]
+       ror     w14,w9,#7
+       and     w17,w21,w20
+       ror     w13,w6,#17
+       bic     w19,w22,w20
+       ror     w15,w24,#2
+       add     w23,w23,w7                      // h+=X[i]
+       eor     w16,w16,w20,ror#11
+       eor     w14,w14,w9,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w24,w25                     // a^b, b^c in next round
+       eor     w16,w16,w20,ror#25      // Sigma1(e)
+       eor     w15,w15,w24,ror#13
+       add     w23,w23,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w13,w13,w6,ror#19
+       eor     w14,w14,w9,lsr#3        // sigma0(X[i+1])
+       add     w23,w23,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w25                     // Maj(a,b,c)
+       eor     w17,w15,w24,ror#22      // Sigma0(a)
+       eor     w13,w13,w6,lsr#10       // sigma1(X[i+14])
+       add     w8,w8,w1
+       add     w27,w27,w23                     // d+=h
+       add     w23,w23,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w8,w8,w14
+       add     w23,w23,w17                     // h+=Sigma0(a)
+       add     w8,w8,w13
+       ldr     w13,[sp,#8]
+       str     w0,[sp,#4]
+       ror     w16,w27,#6
+       add     w22,w22,w28                     // h+=K[i]
+       ror     w15,w10,#7
+       and     w17,w20,w27
+       ror     w14,w7,#17
+       bic     w28,w21,w27
+       ror     w0,w23,#2
+       add     w22,w22,w8                      // h+=X[i]
+       eor     w16,w16,w27,ror#11
+       eor     w15,w15,w10,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w23,w24                     // a^b, b^c in next round
+       eor     w16,w16,w27,ror#25      // Sigma1(e)
+       eor     w0,w0,w23,ror#13
+       add     w22,w22,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w14,w14,w7,ror#19
+       eor     w15,w15,w10,lsr#3       // sigma0(X[i+1])
+       add     w22,w22,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w24                     // Maj(a,b,c)
+       eor     w17,w0,w23,ror#22       // Sigma0(a)
+       eor     w14,w14,w7,lsr#10       // sigma1(X[i+14])
+       add     w9,w9,w2
+       add     w26,w26,w22                     // d+=h
+       add     w22,w22,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w9,w9,w15
+       add     w22,w22,w17                     // h+=Sigma0(a)
+       add     w9,w9,w14
+       ldr     w14,[sp,#12]
+       str     w1,[sp,#8]
+       ror     w16,w26,#6
+       add     w21,w21,w19                     // h+=K[i]
+       ror     w0,w11,#7
+       and     w17,w27,w26
+       ror     w15,w8,#17
+       bic     w19,w20,w26
+       ror     w1,w22,#2
+       add     w21,w21,w9                      // h+=X[i]
+       eor     w16,w16,w26,ror#11
+       eor     w0,w0,w11,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w22,w23                     // a^b, b^c in next round
+       eor     w16,w16,w26,ror#25      // Sigma1(e)
+       eor     w1,w1,w22,ror#13
+       add     w21,w21,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w15,w15,w8,ror#19
+       eor     w0,w0,w11,lsr#3 // sigma0(X[i+1])
+       add     w21,w21,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w23                     // Maj(a,b,c)
+       eor     w17,w1,w22,ror#22       // Sigma0(a)
+       eor     w15,w15,w8,lsr#10       // sigma1(X[i+14])
+       add     w10,w10,w3
+       add     w25,w25,w21                     // d+=h
+       add     w21,w21,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w10,w10,w0
+       add     w21,w21,w17                     // h+=Sigma0(a)
+       add     w10,w10,w15
+       ldr     w15,[sp,#0]
+       str     w2,[sp,#12]
+       ror     w16,w25,#6
+       add     w20,w20,w28                     // h+=K[i]
+       ror     w1,w12,#7
+       and     w17,w26,w25
+       ror     w0,w9,#17
+       bic     w28,w27,w25
+       ror     w2,w21,#2
+       add     w20,w20,w10                     // h+=X[i]
+       eor     w16,w16,w25,ror#11
+       eor     w1,w1,w12,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w21,w22                     // a^b, b^c in next round
+       eor     w16,w16,w25,ror#25      // Sigma1(e)
+       eor     w2,w2,w21,ror#13
+       add     w20,w20,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w0,w0,w9,ror#19
+       eor     w1,w1,w12,lsr#3 // sigma0(X[i+1])
+       add     w20,w20,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w22                     // Maj(a,b,c)
+       eor     w17,w2,w21,ror#22       // Sigma0(a)
+       eor     w0,w0,w9,lsr#10 // sigma1(X[i+14])
+       add     w11,w11,w4
+       add     w24,w24,w20                     // d+=h
+       add     w20,w20,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w11,w11,w1
+       add     w20,w20,w17                     // h+=Sigma0(a)
+       add     w11,w11,w0
+       ldr     w0,[sp,#4]
+       str     w3,[sp,#0]
+       ror     w16,w24,#6
+       add     w27,w27,w19                     // h+=K[i]
+       ror     w2,w13,#7
+       and     w17,w25,w24
+       ror     w1,w10,#17
+       bic     w19,w26,w24
+       ror     w3,w20,#2
+       add     w27,w27,w11                     // h+=X[i]
+       eor     w16,w16,w24,ror#11
+       eor     w2,w2,w13,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w20,w21                     // a^b, b^c in next round
+       eor     w16,w16,w24,ror#25      // Sigma1(e)
+       eor     w3,w3,w20,ror#13
+       add     w27,w27,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w1,w1,w10,ror#19
+       eor     w2,w2,w13,lsr#3 // sigma0(X[i+1])
+       add     w27,w27,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w21                     // Maj(a,b,c)
+       eor     w17,w3,w20,ror#22       // Sigma0(a)
+       eor     w1,w1,w10,lsr#10        // sigma1(X[i+14])
+       add     w12,w12,w5
+       add     w23,w23,w27                     // d+=h
+       add     w27,w27,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w12,w12,w2
+       add     w27,w27,w17                     // h+=Sigma0(a)
+       add     w12,w12,w1
+       ldr     w1,[sp,#8]
+       str     w4,[sp,#4]
+       ror     w16,w23,#6
+       add     w26,w26,w28                     // h+=K[i]
+       ror     w3,w14,#7
+       and     w17,w24,w23
+       ror     w2,w11,#17
+       bic     w28,w25,w23
+       ror     w4,w27,#2
+       add     w26,w26,w12                     // h+=X[i]
+       eor     w16,w16,w23,ror#11
+       eor     w3,w3,w14,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w27,w20                     // a^b, b^c in next round
+       eor     w16,w16,w23,ror#25      // Sigma1(e)
+       eor     w4,w4,w27,ror#13
+       add     w26,w26,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w2,w2,w11,ror#19
+       eor     w3,w3,w14,lsr#3 // sigma0(X[i+1])
+       add     w26,w26,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w20                     // Maj(a,b,c)
+       eor     w17,w4,w27,ror#22       // Sigma0(a)
+       eor     w2,w2,w11,lsr#10        // sigma1(X[i+14])
+       add     w13,w13,w6
+       add     w22,w22,w26                     // d+=h
+       add     w26,w26,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w13,w13,w3
+       add     w26,w26,w17                     // h+=Sigma0(a)
+       add     w13,w13,w2
+       ldr     w2,[sp,#12]
+       str     w5,[sp,#8]
+       ror     w16,w22,#6
+       add     w25,w25,w19                     // h+=K[i]
+       ror     w4,w15,#7
+       and     w17,w23,w22
+       ror     w3,w12,#17
+       bic     w19,w24,w22
+       ror     w5,w26,#2
+       add     w25,w25,w13                     // h+=X[i]
+       eor     w16,w16,w22,ror#11
+       eor     w4,w4,w15,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w26,w27                     // a^b, b^c in next round
+       eor     w16,w16,w22,ror#25      // Sigma1(e)
+       eor     w5,w5,w26,ror#13
+       add     w25,w25,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w3,w3,w12,ror#19
+       eor     w4,w4,w15,lsr#3 // sigma0(X[i+1])
+       add     w25,w25,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w27                     // Maj(a,b,c)
+       eor     w17,w5,w26,ror#22       // Sigma0(a)
+       eor     w3,w3,w12,lsr#10        // sigma1(X[i+14])
+       add     w14,w14,w7
+       add     w21,w21,w25                     // d+=h
+       add     w25,w25,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w14,w14,w4
+       add     w25,w25,w17                     // h+=Sigma0(a)
+       add     w14,w14,w3
+       ldr     w3,[sp,#0]
+       str     w6,[sp,#12]
+       ror     w16,w21,#6
+       add     w24,w24,w28                     // h+=K[i]
+       ror     w5,w0,#7
+       and     w17,w22,w21
+       ror     w4,w13,#17
+       bic     w28,w23,w21
+       ror     w6,w25,#2
+       add     w24,w24,w14                     // h+=X[i]
+       eor     w16,w16,w21,ror#11
+       eor     w5,w5,w0,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w25,w26                     // a^b, b^c in next round
+       eor     w16,w16,w21,ror#25      // Sigma1(e)
+       eor     w6,w6,w25,ror#13
+       add     w24,w24,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w4,w4,w13,ror#19
+       eor     w5,w5,w0,lsr#3  // sigma0(X[i+1])
+       add     w24,w24,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w26                     // Maj(a,b,c)
+       eor     w17,w6,w25,ror#22       // Sigma0(a)
+       eor     w4,w4,w13,lsr#10        // sigma1(X[i+14])
+       add     w15,w15,w8
+       add     w20,w20,w24                     // d+=h
+       add     w24,w24,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w15,w15,w5
+       add     w24,w24,w17                     // h+=Sigma0(a)
+       add     w15,w15,w4
+       ldr     w4,[sp,#4]
+       str     w7,[sp,#0]
+       ror     w16,w20,#6
+       add     w23,w23,w19                     // h+=K[i]
+       ror     w6,w1,#7
+       and     w17,w21,w20
+       ror     w5,w14,#17
+       bic     w19,w22,w20
+       ror     w7,w24,#2
+       add     w23,w23,w15                     // h+=X[i]
+       eor     w16,w16,w20,ror#11
+       eor     w6,w6,w1,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w24,w25                     // a^b, b^c in next round
+       eor     w16,w16,w20,ror#25      // Sigma1(e)
+       eor     w7,w7,w24,ror#13
+       add     w23,w23,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w5,w5,w14,ror#19
+       eor     w6,w6,w1,lsr#3  // sigma0(X[i+1])
+       add     w23,w23,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w25                     // Maj(a,b,c)
+       eor     w17,w7,w24,ror#22       // Sigma0(a)
+       eor     w5,w5,w14,lsr#10        // sigma1(X[i+14])
+       add     w0,w0,w9
+       add     w27,w27,w23                     // d+=h
+       add     w23,w23,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w0,w0,w6
+       add     w23,w23,w17                     // h+=Sigma0(a)
+       add     w0,w0,w5
+       ldr     w5,[sp,#8]
+       str     w8,[sp,#4]
+       ror     w16,w27,#6
+       add     w22,w22,w28                     // h+=K[i]
+       ror     w7,w2,#7
+       and     w17,w20,w27
+       ror     w6,w15,#17
+       bic     w28,w21,w27
+       ror     w8,w23,#2
+       add     w22,w22,w0                      // h+=X[i]
+       eor     w16,w16,w27,ror#11
+       eor     w7,w7,w2,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w23,w24                     // a^b, b^c in next round
+       eor     w16,w16,w27,ror#25      // Sigma1(e)
+       eor     w8,w8,w23,ror#13
+       add     w22,w22,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w6,w6,w15,ror#19
+       eor     w7,w7,w2,lsr#3  // sigma0(X[i+1])
+       add     w22,w22,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w24                     // Maj(a,b,c)
+       eor     w17,w8,w23,ror#22       // Sigma0(a)
+       eor     w6,w6,w15,lsr#10        // sigma1(X[i+14])
+       add     w1,w1,w10
+       add     w26,w26,w22                     // d+=h
+       add     w22,w22,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w1,w1,w7
+       add     w22,w22,w17                     // h+=Sigma0(a)
+       add     w1,w1,w6
+       ldr     w6,[sp,#12]
+       str     w9,[sp,#8]
+       ror     w16,w26,#6
+       add     w21,w21,w19                     // h+=K[i]
+       ror     w8,w3,#7
+       and     w17,w27,w26
+       ror     w7,w0,#17
+       bic     w19,w20,w26
+       ror     w9,w22,#2
+       add     w21,w21,w1                      // h+=X[i]
+       eor     w16,w16,w26,ror#11
+       eor     w8,w8,w3,ror#18
+       orr     w17,w17,w19                     // Ch(e,f,g)
+       eor     w19,w22,w23                     // a^b, b^c in next round
+       eor     w16,w16,w26,ror#25      // Sigma1(e)
+       eor     w9,w9,w22,ror#13
+       add     w21,w21,w17                     // h+=Ch(e,f,g)
+       and     w28,w28,w19                     // (b^c)&=(a^b)
+       eor     w7,w7,w0,ror#19
+       eor     w8,w8,w3,lsr#3  // sigma0(X[i+1])
+       add     w21,w21,w16                     // h+=Sigma1(e)
+       eor     w28,w28,w23                     // Maj(a,b,c)
+       eor     w17,w9,w22,ror#22       // Sigma0(a)
+       eor     w7,w7,w0,lsr#10 // sigma1(X[i+14])
+       add     w2,w2,w11
+       add     w25,w25,w21                     // d+=h
+       add     w21,w21,w28                     // h+=Maj(a,b,c)
+       ldr     w28,[x30],#4            // *K++, w19 in next round
+       add     w2,w2,w8
+       add     w21,w21,w17                     // h+=Sigma0(a)
+       add     w2,w2,w7
+       ldr     w7,[sp,#0]
+       str     w10,[sp,#12]
+       ror     w16,w25,#6
+       add     w20,w20,w28                     // h+=K[i]
+       ror     w9,w4,#7
+       and     w17,w26,w25
+       ror     w8,w1,#17
+       bic     w28,w27,w25
+       ror     w10,w21,#2
+       add     w20,w20,w2                      // h+=X[i]
+       eor     w16,w16,w25,ror#11
+       eor     w9,w9,w4,ror#18
+       orr     w17,w17,w28                     // Ch(e,f,g)
+       eor     w28,w21,w22                     // a^b, b^c in next round
+       eor     w16,w16,w25,ror#25      // Sigma1(e)
+       eor     w10,w10,w21,ror#13
+       add     w20,w20,w17                     // h+=Ch(e,f,g)
+       and     w19,w19,w28                     // (b^c)&=(a^b)
+       eor     w8,w8,w1,ror#19
+       eor     w9,w9,w4,lsr#3  // sigma0(X[i+1])
+       add     w20,w20,w16                     // h+=Sigma1(e)
+       eor     w19,w19,w22                     // Maj(a,b,c)
+       eor     w17,w10,w21,ror#22      // Sigma0(a)
+       eor     w8,w8,w1,lsr#10 // sigma1(X[i+14])
+       add     w3,w3,w12
+       add     w24,w24,w20                     // d+=h
+       add     w20,w20,w19                     // h+=Maj(a,b,c)
+       ldr     w19,[x30],#4            // *K++, w28 in next round
+       add     w3,w3,w9
+       add     w20,w20,w17                     // h+=Sigma0(a)
+       add     w3,w3,w8
+       cbnz    w19,.Loop_16_xx
+
+       ldp     x0,x2,[x29,#96]
+       ldr     x1,[x29,#112]
+       sub     x30,x30,#260            // rewind
+
+       ldp     w3,w4,[x0]
+       ldp     w5,w6,[x0,#2*4]
+       add     x1,x1,#14*4                     // advance input pointer
+       ldp     w7,w8,[x0,#4*4]
+       add     w20,w20,w3
+       ldp     w9,w10,[x0,#6*4]
+       add     w21,w21,w4
+       add     w22,w22,w5
+       add     w23,w23,w6
+       stp     w20,w21,[x0]
+       add     w24,w24,w7
+       add     w25,w25,w8
+       stp     w22,w23,[x0,#2*4]
+       add     w26,w26,w9
+       add     w27,w27,w10
+       cmp     x1,x2
+       stp     w24,w25,[x0,#4*4]
+       stp     w26,w27,[x0,#6*4]
+       b.ne    .Loop
+
+       ldp     x19,x20,[x29,#16]
+       add     sp,sp,#4*4
+       ldp     x21,x22,[x29,#32]
+       ldp     x23,x24,[x29,#48]
+       ldp     x25,x26,[x29,#64]
+       ldp     x27,x28,[x29,#80]
+       ldp     x29,x30,[sp],#128
+       ret
+.size  sha256_block_data_order,.-sha256_block_data_order
+
+.align 6
+.type  .LK256,%object
+.LK256:
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+       .long   0       //terminator
+.size  .LK256,.-.LK256
+#ifndef        __KERNEL__
+.align 3
+.LOPENSSL_armcap_P:
+# ifdef        __ILP32__
+       .long   OPENSSL_armcap_P-.
+# else
+       .quad   OPENSSL_armcap_P-.
+# endif
+#endif
+.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#ifndef        __KERNEL__
+.type  sha256_block_armv8,%function
+.align 6
+sha256_block_armv8:
+.Lv8_entry:
+       stp             x29,x30,[sp,#-16]!
+       add             x29,sp,#0
+
+       ld1             {v0.4s,v1.4s},[x0]
+       adr             x3,.LK256
+
+.Loop_hw:
+       ld1             {v4.16b-v7.16b},[x1],#64
+       sub             x2,x2,#1
+       ld1             {v16.4s},[x3],#16
+       rev32           v4.16b,v4.16b
+       rev32           v5.16b,v5.16b
+       rev32           v6.16b,v6.16b
+       rev32           v7.16b,v7.16b
+       orr             v18.16b,v0.16b,v0.16b           // offload
+       orr             v19.16b,v1.16b,v1.16b
+       ld1             {v17.4s},[x3],#16
+       add             v16.4s,v16.4s,v4.4s
+       .inst   0x5e2828a4      //sha256su0 v4.16b,v5.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+       .inst   0x5e0760c4      //sha256su1 v4.16b,v6.16b,v7.16b
+       ld1             {v16.4s},[x3],#16
+       add             v17.4s,v17.4s,v5.4s
+       .inst   0x5e2828c5      //sha256su0 v5.16b,v6.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+       .inst   0x5e0460e5      //sha256su1 v5.16b,v7.16b,v4.16b
+       ld1             {v17.4s},[x3],#16
+       add             v16.4s,v16.4s,v6.4s
+       .inst   0x5e2828e6      //sha256su0 v6.16b,v7.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+       .inst   0x5e056086      //sha256su1 v6.16b,v4.16b,v5.16b
+       ld1             {v16.4s},[x3],#16
+       add             v17.4s,v17.4s,v7.4s
+       .inst   0x5e282887      //sha256su0 v7.16b,v4.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+       .inst   0x5e0660a7      //sha256su1 v7.16b,v5.16b,v6.16b
+       ld1             {v17.4s},[x3],#16
+       add             v16.4s,v16.4s,v4.4s
+       .inst   0x5e2828a4      //sha256su0 v4.16b,v5.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+       .inst   0x5e0760c4      //sha256su1 v4.16b,v6.16b,v7.16b
+       ld1             {v16.4s},[x3],#16
+       add             v17.4s,v17.4s,v5.4s
+       .inst   0x5e2828c5      //sha256su0 v5.16b,v6.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+       .inst   0x5e0460e5      //sha256su1 v5.16b,v7.16b,v4.16b
+       ld1             {v17.4s},[x3],#16
+       add             v16.4s,v16.4s,v6.4s
+       .inst   0x5e2828e6      //sha256su0 v6.16b,v7.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+       .inst   0x5e056086      //sha256su1 v6.16b,v4.16b,v5.16b
+       ld1             {v16.4s},[x3],#16
+       add             v17.4s,v17.4s,v7.4s
+       .inst   0x5e282887      //sha256su0 v7.16b,v4.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+       .inst   0x5e0660a7      //sha256su1 v7.16b,v5.16b,v6.16b
+       ld1             {v17.4s},[x3],#16
+       add             v16.4s,v16.4s,v4.4s
+       .inst   0x5e2828a4      //sha256su0 v4.16b,v5.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+       .inst   0x5e0760c4      //sha256su1 v4.16b,v6.16b,v7.16b
+       ld1             {v16.4s},[x3],#16
+       add             v17.4s,v17.4s,v5.4s
+       .inst   0x5e2828c5      //sha256su0 v5.16b,v6.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+       .inst   0x5e0460e5      //sha256su1 v5.16b,v7.16b,v4.16b
+       ld1             {v17.4s},[x3],#16
+       add             v16.4s,v16.4s,v6.4s
+       .inst   0x5e2828e6      //sha256su0 v6.16b,v7.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+       .inst   0x5e056086      //sha256su1 v6.16b,v4.16b,v5.16b
+       ld1             {v16.4s},[x3],#16
+       add             v17.4s,v17.4s,v7.4s
+       .inst   0x5e282887      //sha256su0 v7.16b,v4.16b
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+       .inst   0x5e0660a7      //sha256su1 v7.16b,v5.16b,v6.16b
+       ld1             {v17.4s},[x3],#16
+       add             v16.4s,v16.4s,v4.4s
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+
+       ld1             {v16.4s},[x3],#16
+       add             v17.4s,v17.4s,v5.4s
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+
+       ld1             {v17.4s},[x3]
+       add             v16.4s,v16.4s,v6.4s
+       sub             x3,x3,#64*4-16  // rewind
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e104020      //sha256h v0.16b,v1.16b,v16.4s
+       .inst   0x5e105041      //sha256h2 v1.16b,v2.16b,v16.4s
+
+       add             v17.4s,v17.4s,v7.4s
+       orr             v2.16b,v0.16b,v0.16b
+       .inst   0x5e114020      //sha256h v0.16b,v1.16b,v17.4s
+       .inst   0x5e115041      //sha256h2 v1.16b,v2.16b,v17.4s
+
+       add             v0.4s,v0.4s,v18.4s
+       add             v1.4s,v1.4s,v19.4s
+
+       cbnz            x2,.Loop_hw
+
+       st1             {v0.4s,v1.4s},[x0]
+
+       ldr             x29,[sp],#16
+       ret
+.size  sha256_block_armv8,.-sha256_block_armv8
+#endif
+#ifdef __KERNEL__
+.globl sha256_block_neon
+#endif
+.type  sha256_block_neon,%function
+.align 4
+sha256_block_neon:
+.Lneon_entry:
+       stp     x29, x30, [sp, #-16]!
+       mov     x29, sp
+       sub     sp,sp,#16*4
+
+       adr     x16,.LK256
+       add     x2,x1,x2,lsl#6  // len to point at the end of inp
+
+       ld1     {v0.16b},[x1], #16
+       ld1     {v1.16b},[x1], #16
+       ld1     {v2.16b},[x1], #16
+       ld1     {v3.16b},[x1], #16
+       ld1     {v4.4s},[x16], #16
+       ld1     {v5.4s},[x16], #16
+       ld1     {v6.4s},[x16], #16
+       ld1     {v7.4s},[x16], #16
+       rev32   v0.16b,v0.16b           // yes, even on
+       rev32   v1.16b,v1.16b           // big-endian
+       rev32   v2.16b,v2.16b
+       rev32   v3.16b,v3.16b
+       mov     x17,sp
+       add     v4.4s,v4.4s,v0.4s
+       add     v5.4s,v5.4s,v1.4s
+       add     v6.4s,v6.4s,v2.4s
+       st1     {v4.4s-v5.4s},[x17], #32
+       add     v7.4s,v7.4s,v3.4s
+       st1     {v6.4s-v7.4s},[x17]
+       sub     x17,x17,#32
+
+       ldp     w3,w4,[x0]
+       ldp     w5,w6,[x0,#8]
+       ldp     w7,w8,[x0,#16]
+       ldp     w9,w10,[x0,#24]
+       ldr     w12,[sp,#0]
+       mov     w13,wzr
+       eor     w14,w4,w5
+       mov     w15,wzr
+       b       .L_00_48
+
+.align 4
+.L_00_48:
+       ext     v4.16b,v0.16b,v1.16b,#4
+       add     w10,w10,w12
+       add     w3,w3,w15
+       and     w12,w8,w7
+       bic     w15,w9,w7
+       ext     v7.16b,v2.16b,v3.16b,#4
+       eor     w11,w7,w7,ror#5
+       add     w3,w3,w13
+       mov     d19,v3.d[1]
+       orr     w12,w12,w15
+       eor     w11,w11,w7,ror#19
+       ushr    v6.4s,v4.4s,#7
+       eor     w15,w3,w3,ror#11
+       ushr    v5.4s,v4.4s,#3
+       add     w10,w10,w12
+       add     v0.4s,v0.4s,v7.4s
+       ror     w11,w11,#6
+       sli     v6.4s,v4.4s,#25
+       eor     w13,w3,w4
+       eor     w15,w15,w3,ror#20
+       ushr    v7.4s,v4.4s,#18
+       add     w10,w10,w11
+       ldr     w12,[sp,#4]
+       and     w14,w14,w13
+       eor     v5.16b,v5.16b,v6.16b
+       ror     w15,w15,#2
+       add     w6,w6,w10
+       sli     v7.4s,v4.4s,#14
+       eor     w14,w14,w4
+       ushr    v16.4s,v19.4s,#17
+       add     w9,w9,w12
+       add     w10,w10,w15
+       and     w12,w7,w6
+       eor     v5.16b,v5.16b,v7.16b
+       bic     w15,w8,w6
+       eor     w11,w6,w6,ror#5
+       sli     v16.4s,v19.4s,#15
+       add     w10,w10,w14
+       orr     w12,w12,w15
+       ushr    v17.4s,v19.4s,#10
+       eor     w11,w11,w6,ror#19
+       eor     w15,w10,w10,ror#11
+       ushr    v7.4s,v19.4s,#19
+       add     w9,w9,w12
+       ror     w11,w11,#6
+       add     v0.4s,v0.4s,v5.4s
+       eor     w14,w10,w3
+       eor     w15,w15,w10,ror#20
+       sli     v7.4s,v19.4s,#13
+       add     w9,w9,w11
+       ldr     w12,[sp,#8]
+       and     w13,w13,w14
+       eor     v17.16b,v17.16b,v16.16b
+       ror     w15,w15,#2
+       add     w5,w5,w9
+       eor     w13,w13,w3
+       eor     v17.16b,v17.16b,v7.16b
+       add     w8,w8,w12
+       add     w9,w9,w15
+       and     w12,w6,w5
+       add     v0.4s,v0.4s,v17.4s
+       bic     w15,w7,w5
+       eor     w11,w5,w5,ror#5
+       add     w9,w9,w13
+       ushr    v18.4s,v0.4s,#17
+       orr     w12,w12,w15
+       ushr    v19.4s,v0.4s,#10
+       eor     w11,w11,w5,ror#19
+       eor     w15,w9,w9,ror#11
+       sli     v18.4s,v0.4s,#15
+       add     w8,w8,w12
+       ushr    v17.4s,v0.4s,#19
+       ror     w11,w11,#6
+       eor     w13,w9,w10
+       eor     v19.16b,v19.16b,v18.16b
+       eor     w15,w15,w9,ror#20
+       add     w8,w8,w11
+       sli     v17.4s,v0.4s,#13
+       ldr     w12,[sp,#12]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       ld1     {v4.4s},[x16], #16
+       add     w4,w4,w8
+       eor     v19.16b,v19.16b,v17.16b
+       eor     w14,w14,w10
+       eor     v17.16b,v17.16b,v17.16b
+       add     w7,w7,w12
+       add     w8,w8,w15
+       and     w12,w5,w4
+       mov     v17.d[1],v19.d[0]
+       bic     w15,w6,w4
+       eor     w11,w4,w4,ror#5
+       add     w8,w8,w14
+       add     v0.4s,v0.4s,v17.4s
+       orr     w12,w12,w15
+       eor     w11,w11,w4,ror#19
+       eor     w15,w8,w8,ror#11
+       add     v4.4s,v4.4s,v0.4s
+       add     w7,w7,w12
+       ror     w11,w11,#6
+       eor     w14,w8,w9
+       eor     w15,w15,w8,ror#20
+       add     w7,w7,w11
+       ldr     w12,[sp,#16]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w3,w3,w7
+       eor     w13,w13,w9
+       st1     {v4.4s},[x17], #16
+       ext     v4.16b,v1.16b,v2.16b,#4
+       add     w6,w6,w12
+       add     w7,w7,w15
+       and     w12,w4,w3
+       bic     w15,w5,w3
+       ext     v7.16b,v3.16b,v0.16b,#4
+       eor     w11,w3,w3,ror#5
+       add     w7,w7,w13
+       mov     d19,v0.d[1]
+       orr     w12,w12,w15
+       eor     w11,w11,w3,ror#19
+       ushr    v6.4s,v4.4s,#7
+       eor     w15,w7,w7,ror#11
+       ushr    v5.4s,v4.4s,#3
+       add     w6,w6,w12
+       add     v1.4s,v1.4s,v7.4s
+       ror     w11,w11,#6
+       sli     v6.4s,v4.4s,#25
+       eor     w13,w7,w8
+       eor     w15,w15,w7,ror#20
+       ushr    v7.4s,v4.4s,#18
+       add     w6,w6,w11
+       ldr     w12,[sp,#20]
+       and     w14,w14,w13
+       eor     v5.16b,v5.16b,v6.16b
+       ror     w15,w15,#2
+       add     w10,w10,w6
+       sli     v7.4s,v4.4s,#14
+       eor     w14,w14,w8
+       ushr    v16.4s,v19.4s,#17
+       add     w5,w5,w12
+       add     w6,w6,w15
+       and     w12,w3,w10
+       eor     v5.16b,v5.16b,v7.16b
+       bic     w15,w4,w10
+       eor     w11,w10,w10,ror#5
+       sli     v16.4s,v19.4s,#15
+       add     w6,w6,w14
+       orr     w12,w12,w15
+       ushr    v17.4s,v19.4s,#10
+       eor     w11,w11,w10,ror#19
+       eor     w15,w6,w6,ror#11
+       ushr    v7.4s,v19.4s,#19
+       add     w5,w5,w12
+       ror     w11,w11,#6
+       add     v1.4s,v1.4s,v5.4s
+       eor     w14,w6,w7
+       eor     w15,w15,w6,ror#20
+       sli     v7.4s,v19.4s,#13
+       add     w5,w5,w11
+       ldr     w12,[sp,#24]
+       and     w13,w13,w14
+       eor     v17.16b,v17.16b,v16.16b
+       ror     w15,w15,#2
+       add     w9,w9,w5
+       eor     w13,w13,w7
+       eor     v17.16b,v17.16b,v7.16b
+       add     w4,w4,w12
+       add     w5,w5,w15
+       and     w12,w10,w9
+       add     v1.4s,v1.4s,v17.4s
+       bic     w15,w3,w9
+       eor     w11,w9,w9,ror#5
+       add     w5,w5,w13
+       ushr    v18.4s,v1.4s,#17
+       orr     w12,w12,w15
+       ushr    v19.4s,v1.4s,#10
+       eor     w11,w11,w9,ror#19
+       eor     w15,w5,w5,ror#11
+       sli     v18.4s,v1.4s,#15
+       add     w4,w4,w12
+       ushr    v17.4s,v1.4s,#19
+       ror     w11,w11,#6
+       eor     w13,w5,w6
+       eor     v19.16b,v19.16b,v18.16b
+       eor     w15,w15,w5,ror#20
+       add     w4,w4,w11
+       sli     v17.4s,v1.4s,#13
+       ldr     w12,[sp,#28]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       ld1     {v4.4s},[x16], #16
+       add     w8,w8,w4
+       eor     v19.16b,v19.16b,v17.16b
+       eor     w14,w14,w6
+       eor     v17.16b,v17.16b,v17.16b
+       add     w3,w3,w12
+       add     w4,w4,w15
+       and     w12,w9,w8
+       mov     v17.d[1],v19.d[0]
+       bic     w15,w10,w8
+       eor     w11,w8,w8,ror#5
+       add     w4,w4,w14
+       add     v1.4s,v1.4s,v17.4s
+       orr     w12,w12,w15
+       eor     w11,w11,w8,ror#19
+       eor     w15,w4,w4,ror#11
+       add     v4.4s,v4.4s,v1.4s
+       add     w3,w3,w12
+       ror     w11,w11,#6
+       eor     w14,w4,w5
+       eor     w15,w15,w4,ror#20
+       add     w3,w3,w11
+       ldr     w12,[sp,#32]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w7,w7,w3
+       eor     w13,w13,w5
+       st1     {v4.4s},[x17], #16
+       ext     v4.16b,v2.16b,v3.16b,#4
+       add     w10,w10,w12
+       add     w3,w3,w15
+       and     w12,w8,w7
+       bic     w15,w9,w7
+       ext     v7.16b,v0.16b,v1.16b,#4
+       eor     w11,w7,w7,ror#5
+       add     w3,w3,w13
+       mov     d19,v1.d[1]
+       orr     w12,w12,w15
+       eor     w11,w11,w7,ror#19
+       ushr    v6.4s,v4.4s,#7
+       eor     w15,w3,w3,ror#11
+       ushr    v5.4s,v4.4s,#3
+       add     w10,w10,w12
+       add     v2.4s,v2.4s,v7.4s
+       ror     w11,w11,#6
+       sli     v6.4s,v4.4s,#25
+       eor     w13,w3,w4
+       eor     w15,w15,w3,ror#20
+       ushr    v7.4s,v4.4s,#18
+       add     w10,w10,w11
+       ldr     w12,[sp,#36]
+       and     w14,w14,w13
+       eor     v5.16b,v5.16b,v6.16b
+       ror     w15,w15,#2
+       add     w6,w6,w10
+       sli     v7.4s,v4.4s,#14
+       eor     w14,w14,w4
+       ushr    v16.4s,v19.4s,#17
+       add     w9,w9,w12
+       add     w10,w10,w15
+       and     w12,w7,w6
+       eor     v5.16b,v5.16b,v7.16b
+       bic     w15,w8,w6
+       eor     w11,w6,w6,ror#5
+       sli     v16.4s,v19.4s,#15
+       add     w10,w10,w14
+       orr     w12,w12,w15
+       ushr    v17.4s,v19.4s,#10
+       eor     w11,w11,w6,ror#19
+       eor     w15,w10,w10,ror#11
+       ushr    v7.4s,v19.4s,#19
+       add     w9,w9,w12
+       ror     w11,w11,#6
+       add     v2.4s,v2.4s,v5.4s
+       eor     w14,w10,w3
+       eor     w15,w15,w10,ror#20
+       sli     v7.4s,v19.4s,#13
+       add     w9,w9,w11
+       ldr     w12,[sp,#40]
+       and     w13,w13,w14
+       eor     v17.16b,v17.16b,v16.16b
+       ror     w15,w15,#2
+       add     w5,w5,w9
+       eor     w13,w13,w3
+       eor     v17.16b,v17.16b,v7.16b
+       add     w8,w8,w12
+       add     w9,w9,w15
+       and     w12,w6,w5
+       add     v2.4s,v2.4s,v17.4s
+       bic     w15,w7,w5
+       eor     w11,w5,w5,ror#5
+       add     w9,w9,w13
+       ushr    v18.4s,v2.4s,#17
+       orr     w12,w12,w15
+       ushr    v19.4s,v2.4s,#10
+       eor     w11,w11,w5,ror#19
+       eor     w15,w9,w9,ror#11
+       sli     v18.4s,v2.4s,#15
+       add     w8,w8,w12
+       ushr    v17.4s,v2.4s,#19
+       ror     w11,w11,#6
+       eor     w13,w9,w10
+       eor     v19.16b,v19.16b,v18.16b
+       eor     w15,w15,w9,ror#20
+       add     w8,w8,w11
+       sli     v17.4s,v2.4s,#13
+       ldr     w12,[sp,#44]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       ld1     {v4.4s},[x16], #16
+       add     w4,w4,w8
+       eor     v19.16b,v19.16b,v17.16b
+       eor     w14,w14,w10
+       eor     v17.16b,v17.16b,v17.16b
+       add     w7,w7,w12
+       add     w8,w8,w15
+       and     w12,w5,w4
+       mov     v17.d[1],v19.d[0]
+       bic     w15,w6,w4
+       eor     w11,w4,w4,ror#5
+       add     w8,w8,w14
+       add     v2.4s,v2.4s,v17.4s
+       orr     w12,w12,w15
+       eor     w11,w11,w4,ror#19
+       eor     w15,w8,w8,ror#11
+       add     v4.4s,v4.4s,v2.4s
+       add     w7,w7,w12
+       ror     w11,w11,#6
+       eor     w14,w8,w9
+       eor     w15,w15,w8,ror#20
+       add     w7,w7,w11
+       ldr     w12,[sp,#48]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w3,w3,w7
+       eor     w13,w13,w9
+       st1     {v4.4s},[x17], #16
+       ext     v4.16b,v3.16b,v0.16b,#4
+       add     w6,w6,w12
+       add     w7,w7,w15
+       and     w12,w4,w3
+       bic     w15,w5,w3
+       ext     v7.16b,v1.16b,v2.16b,#4
+       eor     w11,w3,w3,ror#5
+       add     w7,w7,w13
+       mov     d19,v2.d[1]
+       orr     w12,w12,w15
+       eor     w11,w11,w3,ror#19
+       ushr    v6.4s,v4.4s,#7
+       eor     w15,w7,w7,ror#11
+       ushr    v5.4s,v4.4s,#3
+       add     w6,w6,w12
+       add     v3.4s,v3.4s,v7.4s
+       ror     w11,w11,#6
+       sli     v6.4s,v4.4s,#25
+       eor     w13,w7,w8
+       eor     w15,w15,w7,ror#20
+       ushr    v7.4s,v4.4s,#18
+       add     w6,w6,w11
+       ldr     w12,[sp,#52]
+       and     w14,w14,w13
+       eor     v5.16b,v5.16b,v6.16b
+       ror     w15,w15,#2
+       add     w10,w10,w6
+       sli     v7.4s,v4.4s,#14
+       eor     w14,w14,w8
+       ushr    v16.4s,v19.4s,#17
+       add     w5,w5,w12
+       add     w6,w6,w15
+       and     w12,w3,w10
+       eor     v5.16b,v5.16b,v7.16b
+       bic     w15,w4,w10
+       eor     w11,w10,w10,ror#5
+       sli     v16.4s,v19.4s,#15
+       add     w6,w6,w14
+       orr     w12,w12,w15
+       ushr    v17.4s,v19.4s,#10
+       eor     w11,w11,w10,ror#19
+       eor     w15,w6,w6,ror#11
+       ushr    v7.4s,v19.4s,#19
+       add     w5,w5,w12
+       ror     w11,w11,#6
+       add     v3.4s,v3.4s,v5.4s
+       eor     w14,w6,w7
+       eor     w15,w15,w6,ror#20
+       sli     v7.4s,v19.4s,#13
+       add     w5,w5,w11
+       ldr     w12,[sp,#56]
+       and     w13,w13,w14
+       eor     v17.16b,v17.16b,v16.16b
+       ror     w15,w15,#2
+       add     w9,w9,w5
+       eor     w13,w13,w7
+       eor     v17.16b,v17.16b,v7.16b
+       add     w4,w4,w12
+       add     w5,w5,w15
+       and     w12,w10,w9
+       add     v3.4s,v3.4s,v17.4s
+       bic     w15,w3,w9
+       eor     w11,w9,w9,ror#5
+       add     w5,w5,w13
+       ushr    v18.4s,v3.4s,#17
+       orr     w12,w12,w15
+       ushr    v19.4s,v3.4s,#10
+       eor     w11,w11,w9,ror#19
+       eor     w15,w5,w5,ror#11
+       sli     v18.4s,v3.4s,#15
+       add     w4,w4,w12
+       ushr    v17.4s,v3.4s,#19
+       ror     w11,w11,#6
+       eor     w13,w5,w6
+       eor     v19.16b,v19.16b,v18.16b
+       eor     w15,w15,w5,ror#20
+       add     w4,w4,w11
+       sli     v17.4s,v3.4s,#13
+       ldr     w12,[sp,#60]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       ld1     {v4.4s},[x16], #16
+       add     w8,w8,w4
+       eor     v19.16b,v19.16b,v17.16b
+       eor     w14,w14,w6
+       eor     v17.16b,v17.16b,v17.16b
+       add     w3,w3,w12
+       add     w4,w4,w15
+       and     w12,w9,w8
+       mov     v17.d[1],v19.d[0]
+       bic     w15,w10,w8
+       eor     w11,w8,w8,ror#5
+       add     w4,w4,w14
+       add     v3.4s,v3.4s,v17.4s
+       orr     w12,w12,w15
+       eor     w11,w11,w8,ror#19
+       eor     w15,w4,w4,ror#11
+       add     v4.4s,v4.4s,v3.4s
+       add     w3,w3,w12
+       ror     w11,w11,#6
+       eor     w14,w4,w5
+       eor     w15,w15,w4,ror#20
+       add     w3,w3,w11
+       ldr     w12,[x16]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w7,w7,w3
+       eor     w13,w13,w5
+       st1     {v4.4s},[x17], #16
+       cmp     w12,#0                          // check for K256 terminator
+       ldr     w12,[sp,#0]
+       sub     x17,x17,#64
+       bne     .L_00_48
+
+       sub     x16,x16,#256            // rewind x16
+       cmp     x1,x2
+       mov     x17, #64
+       csel    x17, x17, xzr, eq
+       sub     x1,x1,x17                       // avoid SEGV
+       mov     x17,sp
+       add     w10,w10,w12
+       add     w3,w3,w15
+       and     w12,w8,w7
+       ld1     {v0.16b},[x1],#16
+       bic     w15,w9,w7
+       eor     w11,w7,w7,ror#5
+       ld1     {v4.4s},[x16],#16
+       add     w3,w3,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w7,ror#19
+       eor     w15,w3,w3,ror#11
+       rev32   v0.16b,v0.16b
+       add     w10,w10,w12
+       ror     w11,w11,#6
+       eor     w13,w3,w4
+       eor     w15,w15,w3,ror#20
+       add     v4.4s,v4.4s,v0.4s
+       add     w10,w10,w11
+       ldr     w12,[sp,#4]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w6,w6,w10
+       eor     w14,w14,w4
+       add     w9,w9,w12
+       add     w10,w10,w15
+       and     w12,w7,w6
+       bic     w15,w8,w6
+       eor     w11,w6,w6,ror#5
+       add     w10,w10,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w6,ror#19
+       eor     w15,w10,w10,ror#11
+       add     w9,w9,w12
+       ror     w11,w11,#6
+       eor     w14,w10,w3
+       eor     w15,w15,w10,ror#20
+       add     w9,w9,w11
+       ldr     w12,[sp,#8]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w5,w5,w9
+       eor     w13,w13,w3
+       add     w8,w8,w12
+       add     w9,w9,w15
+       and     w12,w6,w5
+       bic     w15,w7,w5
+       eor     w11,w5,w5,ror#5
+       add     w9,w9,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w5,ror#19
+       eor     w15,w9,w9,ror#11
+       add     w8,w8,w12
+       ror     w11,w11,#6
+       eor     w13,w9,w10
+       eor     w15,w15,w9,ror#20
+       add     w8,w8,w11
+       ldr     w12,[sp,#12]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w4,w4,w8
+       eor     w14,w14,w10
+       add     w7,w7,w12
+       add     w8,w8,w15
+       and     w12,w5,w4
+       bic     w15,w6,w4
+       eor     w11,w4,w4,ror#5
+       add     w8,w8,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w4,ror#19
+       eor     w15,w8,w8,ror#11
+       add     w7,w7,w12
+       ror     w11,w11,#6
+       eor     w14,w8,w9
+       eor     w15,w15,w8,ror#20
+       add     w7,w7,w11
+       ldr     w12,[sp,#16]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w3,w3,w7
+       eor     w13,w13,w9
+       st1     {v4.4s},[x17], #16
+       add     w6,w6,w12
+       add     w7,w7,w15
+       and     w12,w4,w3
+       ld1     {v1.16b},[x1],#16
+       bic     w15,w5,w3
+       eor     w11,w3,w3,ror#5
+       ld1     {v4.4s},[x16],#16
+       add     w7,w7,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w3,ror#19
+       eor     w15,w7,w7,ror#11
+       rev32   v1.16b,v1.16b
+       add     w6,w6,w12
+       ror     w11,w11,#6
+       eor     w13,w7,w8
+       eor     w15,w15,w7,ror#20
+       add     v4.4s,v4.4s,v1.4s
+       add     w6,w6,w11
+       ldr     w12,[sp,#20]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w10,w10,w6
+       eor     w14,w14,w8
+       add     w5,w5,w12
+       add     w6,w6,w15
+       and     w12,w3,w10
+       bic     w15,w4,w10
+       eor     w11,w10,w10,ror#5
+       add     w6,w6,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w10,ror#19
+       eor     w15,w6,w6,ror#11
+       add     w5,w5,w12
+       ror     w11,w11,#6
+       eor     w14,w6,w7
+       eor     w15,w15,w6,ror#20
+       add     w5,w5,w11
+       ldr     w12,[sp,#24]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w9,w9,w5
+       eor     w13,w13,w7
+       add     w4,w4,w12
+       add     w5,w5,w15
+       and     w12,w10,w9
+       bic     w15,w3,w9
+       eor     w11,w9,w9,ror#5
+       add     w5,w5,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w9,ror#19
+       eor     w15,w5,w5,ror#11
+       add     w4,w4,w12
+       ror     w11,w11,#6
+       eor     w13,w5,w6
+       eor     w15,w15,w5,ror#20
+       add     w4,w4,w11
+       ldr     w12,[sp,#28]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w8,w8,w4
+       eor     w14,w14,w6
+       add     w3,w3,w12
+       add     w4,w4,w15
+       and     w12,w9,w8
+       bic     w15,w10,w8
+       eor     w11,w8,w8,ror#5
+       add     w4,w4,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w8,ror#19
+       eor     w15,w4,w4,ror#11
+       add     w3,w3,w12
+       ror     w11,w11,#6
+       eor     w14,w4,w5
+       eor     w15,w15,w4,ror#20
+       add     w3,w3,w11
+       ldr     w12,[sp,#32]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w7,w7,w3
+       eor     w13,w13,w5
+       st1     {v4.4s},[x17], #16
+       add     w10,w10,w12
+       add     w3,w3,w15
+       and     w12,w8,w7
+       ld1     {v2.16b},[x1],#16
+       bic     w15,w9,w7
+       eor     w11,w7,w7,ror#5
+       ld1     {v4.4s},[x16],#16
+       add     w3,w3,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w7,ror#19
+       eor     w15,w3,w3,ror#11
+       rev32   v2.16b,v2.16b
+       add     w10,w10,w12
+       ror     w11,w11,#6
+       eor     w13,w3,w4
+       eor     w15,w15,w3,ror#20
+       add     v4.4s,v4.4s,v2.4s
+       add     w10,w10,w11
+       ldr     w12,[sp,#36]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w6,w6,w10
+       eor     w14,w14,w4
+       add     w9,w9,w12
+       add     w10,w10,w15
+       and     w12,w7,w6
+       bic     w15,w8,w6
+       eor     w11,w6,w6,ror#5
+       add     w10,w10,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w6,ror#19
+       eor     w15,w10,w10,ror#11
+       add     w9,w9,w12
+       ror     w11,w11,#6
+       eor     w14,w10,w3
+       eor     w15,w15,w10,ror#20
+       add     w9,w9,w11
+       ldr     w12,[sp,#40]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w5,w5,w9
+       eor     w13,w13,w3
+       add     w8,w8,w12
+       add     w9,w9,w15
+       and     w12,w6,w5
+       bic     w15,w7,w5
+       eor     w11,w5,w5,ror#5
+       add     w9,w9,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w5,ror#19
+       eor     w15,w9,w9,ror#11
+       add     w8,w8,w12
+       ror     w11,w11,#6
+       eor     w13,w9,w10
+       eor     w15,w15,w9,ror#20
+       add     w8,w8,w11
+       ldr     w12,[sp,#44]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w4,w4,w8
+       eor     w14,w14,w10
+       add     w7,w7,w12
+       add     w8,w8,w15
+       and     w12,w5,w4
+       bic     w15,w6,w4
+       eor     w11,w4,w4,ror#5
+       add     w8,w8,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w4,ror#19
+       eor     w15,w8,w8,ror#11
+       add     w7,w7,w12
+       ror     w11,w11,#6
+       eor     w14,w8,w9
+       eor     w15,w15,w8,ror#20
+       add     w7,w7,w11
+       ldr     w12,[sp,#48]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w3,w3,w7
+       eor     w13,w13,w9
+       st1     {v4.4s},[x17], #16
+       add     w6,w6,w12
+       add     w7,w7,w15
+       and     w12,w4,w3
+       ld1     {v3.16b},[x1],#16
+       bic     w15,w5,w3
+       eor     w11,w3,w3,ror#5
+       ld1     {v4.4s},[x16],#16
+       add     w7,w7,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w3,ror#19
+       eor     w15,w7,w7,ror#11
+       rev32   v3.16b,v3.16b
+       add     w6,w6,w12
+       ror     w11,w11,#6
+       eor     w13,w7,w8
+       eor     w15,w15,w7,ror#20
+       add     v4.4s,v4.4s,v3.4s
+       add     w6,w6,w11
+       ldr     w12,[sp,#52]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w10,w10,w6
+       eor     w14,w14,w8
+       add     w5,w5,w12
+       add     w6,w6,w15
+       and     w12,w3,w10
+       bic     w15,w4,w10
+       eor     w11,w10,w10,ror#5
+       add     w6,w6,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w10,ror#19
+       eor     w15,w6,w6,ror#11
+       add     w5,w5,w12
+       ror     w11,w11,#6
+       eor     w14,w6,w7
+       eor     w15,w15,w6,ror#20
+       add     w5,w5,w11
+       ldr     w12,[sp,#56]
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w9,w9,w5
+       eor     w13,w13,w7
+       add     w4,w4,w12
+       add     w5,w5,w15
+       and     w12,w10,w9
+       bic     w15,w3,w9
+       eor     w11,w9,w9,ror#5
+       add     w5,w5,w13
+       orr     w12,w12,w15
+       eor     w11,w11,w9,ror#19
+       eor     w15,w5,w5,ror#11
+       add     w4,w4,w12
+       ror     w11,w11,#6
+       eor     w13,w5,w6
+       eor     w15,w15,w5,ror#20
+       add     w4,w4,w11
+       ldr     w12,[sp,#60]
+       and     w14,w14,w13
+       ror     w15,w15,#2
+       add     w8,w8,w4
+       eor     w14,w14,w6
+       add     w3,w3,w12
+       add     w4,w4,w15
+       and     w12,w9,w8
+       bic     w15,w10,w8
+       eor     w11,w8,w8,ror#5
+       add     w4,w4,w14
+       orr     w12,w12,w15
+       eor     w11,w11,w8,ror#19
+       eor     w15,w4,w4,ror#11
+       add     w3,w3,w12
+       ror     w11,w11,#6
+       eor     w14,w4,w5
+       eor     w15,w15,w4,ror#20
+       add     w3,w3,w11
+       and     w13,w13,w14
+       ror     w15,w15,#2
+       add     w7,w7,w3
+       eor     w13,w13,w5
+       st1     {v4.4s},[x17], #16
+       add     w3,w3,w15                       // h+=Sigma0(a) from the past
+       ldp     w11,w12,[x0,#0]
+       add     w3,w3,w13                       // h+=Maj(a,b,c) from the past
+       ldp     w13,w14,[x0,#8]
+       add     w3,w3,w11                       // accumulate
+       add     w4,w4,w12
+       ldp     w11,w12,[x0,#16]
+       add     w5,w5,w13
+       add     w6,w6,w14
+       ldp     w13,w14,[x0,#24]
+       add     w7,w7,w11
+       add     w8,w8,w12
+        ldr    w12,[sp,#0]
+       stp     w3,w4,[x0,#0]
+       add     w9,w9,w13
+        mov    w13,wzr
+       stp     w5,w6,[x0,#8]
+       add     w10,w10,w14
+       stp     w7,w8,[x0,#16]
+        eor    w14,w4,w5
+       stp     w9,w10,[x0,#24]
+        mov    w15,wzr
+        mov    x17,sp
+       b.ne    .L_00_48
+
+       ldr     x29,[x29]
+       add     sp,sp,#16*4+16
+       ret
+.size  sha256_block_neon,.-sha256_block_neon
+#ifndef        __KERNEL__
+.comm  OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
new file mode 100644 (file)
index 0000000..a2226f8
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * Linux/arm64 port of the OpenSSL SHA256 implementation for AArch64
+ *
+ * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash for arm64");
+MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
+
+asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
+                                       unsigned int num_blks);
+
+asmlinkage void sha256_block_neon(u32 *digest, const void *data,
+                                 unsigned int num_blks);
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int len)
+{
+       return sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_data_order);
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+                       unsigned int len, u8 *out)
+{
+       if (len)
+               sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_data_order);
+       sha256_base_do_finalize(desc,
+                               (sha256_block_fn *)sha256_block_data_order);
+
+       return sha256_base_finish(desc, out);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+       return sha256_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg algs[] = { {
+       .digestsize             = SHA256_DIGEST_SIZE,
+       .init                   = sha256_base_init,
+       .update                 = sha256_update,
+       .final                  = sha256_final,
+       .finup                  = sha256_finup,
+       .descsize               = sizeof(struct sha256_state),
+       .base.cra_name          = "sha256",
+       .base.cra_driver_name   = "sha256-arm64",
+       .base.cra_priority      = 100,
+       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
+       .base.cra_blocksize     = SHA256_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+}, {
+       .digestsize             = SHA224_DIGEST_SIZE,
+       .init                   = sha224_base_init,
+       .update                 = sha256_update,
+       .final                  = sha256_final,
+       .finup                  = sha256_finup,
+       .descsize               = sizeof(struct sha256_state),
+       .base.cra_name          = "sha224",
+       .base.cra_driver_name   = "sha224-arm64",
+       .base.cra_priority      = 100,
+       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
+       .base.cra_blocksize     = SHA224_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+} };
+
+static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
+                             unsigned int len)
+{
+       /*
+        * Stacking and unstacking a substantial slice of the NEON register
+        * file may significantly affect performance for small updates when
+        * executing in interrupt context, so fall back to the scalar code
+        * in that case.
+        */
+       if (!may_use_simd())
+               return sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_data_order);
+
+       kernel_neon_begin();
+       sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_neon);
+       kernel_neon_end();
+
+       return 0;
+}
+
+static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
+                            unsigned int len, u8 *out)
+{
+       if (!may_use_simd()) {
+               if (len)
+                       sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_data_order);
+               sha256_base_do_finalize(desc,
+                               (sha256_block_fn *)sha256_block_data_order);
+       } else {
+               kernel_neon_begin();
+               if (len)
+                       sha256_base_do_update(desc, data, len,
+                               (sha256_block_fn *)sha256_block_neon);
+               sha256_base_do_finalize(desc,
+                               (sha256_block_fn *)sha256_block_neon);
+               kernel_neon_end();
+       }
+       return sha256_base_finish(desc, out);
+}
+
+static int sha256_final_neon(struct shash_desc *desc, u8 *out)
+{
+       return sha256_finup_neon(desc, NULL, 0, out);
+}
+
+static struct shash_alg neon_algs[] = { {
+       .digestsize             = SHA256_DIGEST_SIZE,
+       .init                   = sha256_base_init,
+       .update                 = sha256_update_neon,
+       .final                  = sha256_final_neon,
+       .finup                  = sha256_finup_neon,
+       .descsize               = sizeof(struct sha256_state),
+       .base.cra_name          = "sha256",
+       .base.cra_driver_name   = "sha256-arm64-neon",
+       .base.cra_priority      = 150,
+       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
+       .base.cra_blocksize     = SHA256_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+}, {
+       .digestsize             = SHA224_DIGEST_SIZE,
+       .init                   = sha224_base_init,
+       .update                 = sha256_update_neon,
+       .final                  = sha256_final_neon,
+       .finup                  = sha256_finup_neon,
+       .descsize               = sizeof(struct sha256_state),
+       .base.cra_name          = "sha224",
+       .base.cra_driver_name   = "sha224-arm64-neon",
+       .base.cra_priority      = 150,
+       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
+       .base.cra_blocksize     = SHA224_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+} };
+
+static int __init sha256_mod_init(void)
+{
+       int ret = crypto_register_shashes(algs, ARRAY_SIZE(algs));
+       if (ret)
+               return ret;
+
+       if (elf_hwcap & HWCAP_ASIMD) {
+               ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs));
+               if (ret)
+                       crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+       }
+       return ret;
+}
+
+static void __exit sha256_mod_fini(void)
+{
+       if (elf_hwcap & HWCAP_ASIMD)
+               crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs));
+       crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha256_mod_init);
+module_exit(sha256_mod_fini);
diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl
new file mode 100644 (file)
index 0000000..c55efb3
--- /dev/null
@@ -0,0 +1,778 @@
+#! /usr/bin/env perl
+# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPLv2 terms is granted.
+# ====================================================================
+#
+# SHA256/512 for ARMv8.
+#
+# Performance in cycles per processed byte and improvement coefficient
+# over code generated with "default" compiler:
+#
+#              SHA256-hw       SHA256(*)       SHA512
+# Apple A7     1.97            10.5 (+33%)     6.73 (-1%(**))
+# Cortex-A53   2.38            15.5 (+115%)    10.0 (+150%(***))
+# Cortex-A57   2.31            11.6 (+86%)     7.51 (+260%(***))
+# Denver       2.01            10.5 (+26%)     6.70 (+8%)
+# X-Gene                       20.0 (+100%)    12.8 (+300%(***))
+# Mongoose     2.36            13.0 (+50%)     8.36 (+33%)
+#
+# (*)  Software SHA256 results are of lesser relevance, presented
+#      mostly for informational purposes.
+# (**) The result is a trade-off: it's possible to improve it by
+#      10% (or by 1 cycle per round), but at the cost of 20% loss
+#      on Cortex-A53 (or by 4 cycles per round).
+# (***)        Super-impressive coefficients over gcc-generated code are
+#      indication of some compiler "pathology", most notably code
+#      generated with -mgeneral-regs-only is significanty faster
+#      and the gap is only 40-90%.
+#
+# October 2016.
+#
+# Originally it was reckoned that it makes no sense to implement NEON
+# version of SHA256 for 64-bit processors. This is because performance
+# improvement on most wide-spread Cortex-A5x processors was observed
+# to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
+# observed that 32-bit NEON SHA256 performs significantly better than
+# 64-bit scalar version on *some* of the more recent processors. As
+# result 64-bit NEON version of SHA256 was added to provide best
+# all-round performance. For example it executes ~30% faster on X-Gene
+# and Mongoose. [For reference, NEON version of SHA512 is bound to
+# deliver much less improvement, likely *negative* on Cortex-A5x.
+# Which is why NEON support is limited to SHA256.]
+
+$output=pop;
+$flavour=pop;
+
+if ($flavour && $flavour ne "void") {
+    $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+    ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+    ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+    die "can't locate arm-xlate.pl";
+
+    open OUT,"| \"$^X\" $xlate $flavour $output";
+    *STDOUT=*OUT;
+} else {
+    open STDOUT,">$output";
+}
+
+if ($output =~ /512/) {
+       $BITS=512;
+       $SZ=8;
+       @Sigma0=(28,34,39);
+       @Sigma1=(14,18,41);
+       @sigma0=(1,  8, 7);
+       @sigma1=(19,61, 6);
+       $rounds=80;
+       $reg_t="x";
+} else {
+       $BITS=256;
+       $SZ=4;
+       @Sigma0=( 2,13,22);
+       @Sigma1=( 6,11,25);
+       @sigma0=( 7,18, 3);
+       @sigma1=(17,19,10);
+       $rounds=64;
+       $reg_t="w";
+}
+
+$func="sha${BITS}_block_data_order";
+
+($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));
+
+@X=map("$reg_t$_",(3..15,0..2));
+@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27));
+($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28));
+
+sub BODY_00_xx {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my $j=($i+1)&15;
+my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
+   $T0=@X[$i+3] if ($i<11);
+
+$code.=<<___   if ($i<16);
+#ifndef        __AARCH64EB__
+       rev     @X[$i],@X[$i]                   // $i
+#endif
+___
+$code.=<<___   if ($i<13 && ($i&1));
+       ldp     @X[$i+1],@X[$i+2],[$inp],#2*$SZ
+___
+$code.=<<___   if ($i==13);
+       ldp     @X[14],@X[15],[$inp]
+___
+$code.=<<___   if ($i>=14);
+       ldr     @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`]
+___
+$code.=<<___   if ($i>0 && $i<16);
+       add     $a,$a,$t1                       // h+=Sigma0(a)
+___
+$code.=<<___   if ($i>=11);
+       str     @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`]
+___
+# While ARMv8 specifies merged rotate-n-logical operation such as
+# 'eor x,y,z,ror#n', it was found to negatively affect performance
+# on Apple A7. The reason seems to be that it requires even 'y' to
+# be available earlier. This means that such merged instruction is
+# not necessarily best choice on critical path... On the other hand
+# Cortex-A5x handles merged instructions much better than disjoint
+# rotate and logical... See (**) footnote above.
+$code.=<<___   if ($i<15);
+       ror     $t0,$e,#$Sigma1[0]
+       add     $h,$h,$t2                       // h+=K[i]
+       eor     $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]`
+       and     $t1,$f,$e
+       bic     $t2,$g,$e
+       add     $h,$h,@X[$i&15]                 // h+=X[i]
+       orr     $t1,$t1,$t2                     // Ch(e,f,g)
+       eor     $t2,$a,$b                       // a^b, b^c in next round
+       eor     $t0,$t0,$T0,ror#$Sigma1[1]      // Sigma1(e)
+       ror     $T0,$a,#$Sigma0[0]
+       add     $h,$h,$t1                       // h+=Ch(e,f,g)
+       eor     $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]`
+       add     $h,$h,$t0                       // h+=Sigma1(e)
+       and     $t3,$t3,$t2                     // (b^c)&=(a^b)
+       add     $d,$d,$h                        // d+=h
+       eor     $t3,$t3,$b                      // Maj(a,b,c)
+       eor     $t1,$T0,$t1,ror#$Sigma0[1]      // Sigma0(a)
+       add     $h,$h,$t3                       // h+=Maj(a,b,c)
+       ldr     $t3,[$Ktbl],#$SZ                // *K++, $t2 in next round
+       //add   $h,$h,$t1                       // h+=Sigma0(a)
+___
+$code.=<<___   if ($i>=15);
+       ror     $t0,$e,#$Sigma1[0]
+       add     $h,$h,$t2                       // h+=K[i]
+       ror     $T1,@X[($j+1)&15],#$sigma0[0]
+       and     $t1,$f,$e
+       ror     $T2,@X[($j+14)&15],#$sigma1[0]
+       bic     $t2,$g,$e
+       ror     $T0,$a,#$Sigma0[0]
+       add     $h,$h,@X[$i&15]                 // h+=X[i]
+       eor     $t0,$t0,$e,ror#$Sigma1[1]
+       eor     $T1,$T1,@X[($j+1)&15],ror#$sigma0[1]
+       orr     $t1,$t1,$t2                     // Ch(e,f,g)
+       eor     $t2,$a,$b                       // a^b, b^c in next round
+       eor     $t0,$t0,$e,ror#$Sigma1[2]       // Sigma1(e)
+       eor     $T0,$T0,$a,ror#$Sigma0[1]
+       add     $h,$h,$t1                       // h+=Ch(e,f,g)
+       and     $t3,$t3,$t2                     // (b^c)&=(a^b)
+       eor     $T2,$T2,@X[($j+14)&15],ror#$sigma1[1]
+       eor     $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2]    // sigma0(X[i+1])
+       add     $h,$h,$t0                       // h+=Sigma1(e)
+       eor     $t3,$t3,$b                      // Maj(a,b,c)
+       eor     $t1,$T0,$a,ror#$Sigma0[2]       // Sigma0(a)
+       eor     $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2]   // sigma1(X[i+14])
+       add     @X[$j],@X[$j],@X[($j+9)&15]
+       add     $d,$d,$h                        // d+=h
+       add     $h,$h,$t3                       // h+=Maj(a,b,c)
+       ldr     $t3,[$Ktbl],#$SZ                // *K++, $t2 in next round
+       add     @X[$j],@X[$j],$T1
+       add     $h,$h,$t1                       // h+=Sigma0(a)
+       add     @X[$j],@X[$j],$T2
+___
+       ($t2,$t3)=($t3,$t2);
+}
+
+$code.=<<___;
+#ifndef        __KERNEL__
+# include "arm_arch.h"
+#endif
+
+.text
+
+.extern        OPENSSL_armcap_P
+.globl $func
+.type  $func,%function
+.align 6
+$func:
+___
+$code.=<<___   if ($SZ==4);
+#ifndef        __KERNEL__
+# ifdef        __ILP32__
+       ldrsw   x16,.LOPENSSL_armcap_P
+# else
+       ldr     x16,.LOPENSSL_armcap_P
+# endif
+       adr     x17,.LOPENSSL_armcap_P
+       add     x16,x16,x17
+       ldr     w16,[x16]
+       tst     w16,#ARMV8_SHA256
+       b.ne    .Lv8_entry
+       tst     w16,#ARMV7_NEON
+       b.ne    .Lneon_entry
+#endif
+___
+$code.=<<___;
+       stp     x29,x30,[sp,#-128]!
+       add     x29,sp,#0
+
+       stp     x19,x20,[sp,#16]
+       stp     x21,x22,[sp,#32]
+       stp     x23,x24,[sp,#48]
+       stp     x25,x26,[sp,#64]
+       stp     x27,x28,[sp,#80]
+       sub     sp,sp,#4*$SZ
+
+       ldp     $A,$B,[$ctx]                            // load context
+       ldp     $C,$D,[$ctx,#2*$SZ]
+       ldp     $E,$F,[$ctx,#4*$SZ]
+       add     $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input
+       ldp     $G,$H,[$ctx,#6*$SZ]
+       adr     $Ktbl,.LK$BITS
+       stp     $ctx,$num,[x29,#96]
+
+.Loop:
+       ldp     @X[0],@X[1],[$inp],#2*$SZ
+       ldr     $t2,[$Ktbl],#$SZ                        // *K++
+       eor     $t3,$B,$C                               // magic seed
+       str     $inp,[x29,#112]
+___
+for ($i=0;$i<16;$i++)  { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=".Loop_16_xx:\n";
+for (;$i<32;$i++)      { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+       cbnz    $t2,.Loop_16_xx
+
+       ldp     $ctx,$num,[x29,#96]
+       ldr     $inp,[x29,#112]
+       sub     $Ktbl,$Ktbl,#`$SZ*($rounds+1)`          // rewind
+
+       ldp     @X[0],@X[1],[$ctx]
+       ldp     @X[2],@X[3],[$ctx,#2*$SZ]
+       add     $inp,$inp,#14*$SZ                       // advance input pointer
+       ldp     @X[4],@X[5],[$ctx,#4*$SZ]
+       add     $A,$A,@X[0]
+       ldp     @X[6],@X[7],[$ctx,#6*$SZ]
+       add     $B,$B,@X[1]
+       add     $C,$C,@X[2]
+       add     $D,$D,@X[3]
+       stp     $A,$B,[$ctx]
+       add     $E,$E,@X[4]
+       add     $F,$F,@X[5]
+       stp     $C,$D,[$ctx,#2*$SZ]
+       add     $G,$G,@X[6]
+       add     $H,$H,@X[7]
+       cmp     $inp,$num
+       stp     $E,$F,[$ctx,#4*$SZ]
+       stp     $G,$H,[$ctx,#6*$SZ]
+       b.ne    .Loop
+
+       ldp     x19,x20,[x29,#16]
+       add     sp,sp,#4*$SZ
+       ldp     x21,x22,[x29,#32]
+       ldp     x23,x24,[x29,#48]
+       ldp     x25,x26,[x29,#64]
+       ldp     x27,x28,[x29,#80]
+       ldp     x29,x30,[sp],#128
+       ret
+.size  $func,.-$func
+
+.align 6
+.type  .LK$BITS,%object
+.LK$BITS:
+___
+$code.=<<___ if ($SZ==8);
+       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0x3956c25bf348b538,0x59f111f1b605d019
+       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0xd807aa98a3030242,0x12835b0145706fbe
+       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xd192e819d6ef5218,0xd69906245565a910
+       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x90befffa23631e28,0xa4506cebde82bde9
+       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xca273eceea26619c,0xd186b8c721c0c207
+       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x28db77f523047d84,0x32caab7b40c72493
+       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
+       .quad   0       // terminator
+___
+$code.=<<___ if ($SZ==4);
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+       .long   0       //terminator
+___
+$code.=<<___;
+.size  .LK$BITS,.-.LK$BITS
+#ifndef        __KERNEL__
+.align 3
+.LOPENSSL_armcap_P:
+# ifdef        __ILP32__
+       .long   OPENSSL_armcap_P-.
+# else
+       .quad   OPENSSL_armcap_P-.
+# endif
+#endif
+.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+if ($SZ==4) {
+my $Ktbl="x3";
+
+my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2));
+my @MSG=map("v$_.16b",(4..7));
+my ($W0,$W1)=("v16.4s","v17.4s");
+my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b");
+
+$code.=<<___;
+#ifndef        __KERNEL__
+.type  sha256_block_armv8,%function
+.align 6
+sha256_block_armv8:
+.Lv8_entry:
+       stp             x29,x30,[sp,#-16]!
+       add             x29,sp,#0
+
+       ld1.32          {$ABCD,$EFGH},[$ctx]
+       adr             $Ktbl,.LK256
+
+.Loop_hw:
+       ld1             {@MSG[0]-@MSG[3]},[$inp],#64
+       sub             $num,$num,#1
+       ld1.32          {$W0},[$Ktbl],#16
+       rev32           @MSG[0],@MSG[0]
+       rev32           @MSG[1],@MSG[1]
+       rev32           @MSG[2],@MSG[2]
+       rev32           @MSG[3],@MSG[3]
+       orr             $ABCD_SAVE,$ABCD,$ABCD          // offload
+       orr             $EFGH_SAVE,$EFGH,$EFGH
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+       ld1.32          {$W1},[$Ktbl],#16
+       add.i32         $W0,$W0,@MSG[0]
+       sha256su0       @MSG[0],@MSG[1]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+       sha256su1       @MSG[0],@MSG[2],@MSG[3]
+___
+       ($W0,$W1)=($W1,$W0);    push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+       ld1.32          {$W1},[$Ktbl],#16
+       add.i32         $W0,$W0,@MSG[0]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+
+       ld1.32          {$W0},[$Ktbl],#16
+       add.i32         $W1,$W1,@MSG[1]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W1
+       sha256h2        $EFGH,$abcd,$W1
+
+       ld1.32          {$W1},[$Ktbl]
+       add.i32         $W0,$W0,@MSG[2]
+       sub             $Ktbl,$Ktbl,#$rounds*$SZ-16     // rewind
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W0
+       sha256h2        $EFGH,$abcd,$W0
+
+       add.i32         $W1,$W1,@MSG[3]
+       orr             $abcd,$ABCD,$ABCD
+       sha256h         $ABCD,$EFGH,$W1
+       sha256h2        $EFGH,$abcd,$W1
+
+       add.i32         $ABCD,$ABCD,$ABCD_SAVE
+       add.i32         $EFGH,$EFGH,$EFGH_SAVE
+
+       cbnz            $num,.Loop_hw
+
+       st1.32          {$ABCD,$EFGH},[$ctx]
+
+       ldr             x29,[sp],#16
+       ret
+.size  sha256_block_armv8,.-sha256_block_armv8
+#endif
+___
+}
+
+if ($SZ==4) {  ######################################### NEON stuff #
+# You'll surely note a lot of similarities with sha256-armv4 module,
+# and of course it's not a coincidence. sha256-armv4 was used as
+# initial template, but was adapted for ARMv8 instruction set and
+# extensively re-tuned for all-round performance.
+
+my @V = ($A,$B,$C,$D,$E,$F,$G,$H) = map("w$_",(3..10));
+my ($t0,$t1,$t2,$t3,$t4) = map("w$_",(11..15));
+my $Ktbl="x16";
+my $Xfer="x17";
+my @X = map("q$_",(0..3));
+my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7) = map("q$_",(4..7,16..19));
+my $j=0;
+
+sub AUTOLOAD()          # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+  my $arg = pop;
+    $arg = "#$arg" if ($arg*1 eq $arg);
+    $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Dscalar { shift =~ m|[qv]([0-9]+)|?"d$1":""; }
+sub Dlo     { shift =~ m|[qv]([0-9]+)|?"v$1.d[0]":""; }
+sub Dhi     { shift =~ m|[qv]([0-9]+)|?"v$1.d[1]":""; }
+
+sub Xupdate()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+       &ext_8          ($T0,@X[0],@X[1],4);    # X[1..4]
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &ext_8          ($T3,@X[2],@X[3],4);    # X[9..12]
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &mov            (&Dscalar($T7),&Dhi(@X[3]));    # X[14..15]
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &ushr_32        ($T2,$T0,$sigma0[0]);
+        eval(shift(@insns));
+       &ushr_32        ($T1,$T0,$sigma0[2]);
+        eval(shift(@insns));
+       &add_32         (@X[0],@X[0],$T3);      # X[0..3] += X[9..12]
+        eval(shift(@insns));
+       &sli_32         ($T2,$T0,32-$sigma0[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &ushr_32        ($T3,$T0,$sigma0[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &eor_8          ($T1,$T1,$T2);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &sli_32         ($T3,$T0,32-$sigma0[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &ushr_32      ($T4,$T7,$sigma1[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &eor_8          ($T1,$T1,$T3);          # sigma0(X[1..4])
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &sli_32       ($T4,$T7,32-$sigma1[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &ushr_32      ($T5,$T7,$sigma1[2]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &ushr_32      ($T3,$T7,$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &add_32         (@X[0],@X[0],$T1);      # X[0..3] += sigma0(X[1..4])
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &sli_u32      ($T3,$T7,32-$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &eor_8        ($T5,$T5,$T4);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &eor_8        ($T5,$T5,$T3);          # sigma1(X[14..15])
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &add_32         (@X[0],@X[0],$T5);      # X[0..1] += sigma1(X[14..15])
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &ushr_32      ($T6,@X[0],$sigma1[0]);
+        eval(shift(@insns));
+         &ushr_32      ($T7,@X[0],$sigma1[2]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &sli_32       ($T6,@X[0],32-$sigma1[0]);
+        eval(shift(@insns));
+         &ushr_32      ($T5,@X[0],$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &eor_8        ($T7,$T7,$T6);
+        eval(shift(@insns));
+        eval(shift(@insns));
+         &sli_32       ($T5,@X[0],32-$sigma1[1]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &ld1_32         ("{$T0}","[$Ktbl], #16");
+        eval(shift(@insns));
+         &eor_8        ($T7,$T7,$T5);          # sigma1(X[16..17])
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &eor_8          ($T5,$T5,$T5);
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &mov            (&Dhi($T5), &Dlo($T7));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &add_32         (@X[0],@X[0],$T5);      # X[2..3] += sigma1(X[16..17])
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &add_32         ($T0,$T0,@X[0]);
+        while($#insns>=1) { eval(shift(@insns)); }
+       &st1_32         ("{$T0}","[$Xfer], #16");
+        eval(shift(@insns));
+
+       push(@X,shift(@X));             # "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+  my $body = shift;
+  my @insns = (&$body,&$body,&$body,&$body);
+  my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &ld1_8          ("{@X[0]}","[$inp],#16");
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &ld1_32         ("{$T0}","[$Ktbl],#16");
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &rev32          (@X[0],@X[0]);
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+        eval(shift(@insns));
+       &add_32         ($T0,$T0,@X[0]);
+        foreach (@insns) { eval; }     # remaining instructions
+       &st1_32         ("{$T0}","[$Xfer], #16");
+
+       push(@X,shift(@X));             # "rotate" X[]
+}
+
+sub body_00_15 () {
+       (
+       '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+       '&add   ($h,$h,$t1)',                   # h+=X[i]+K[i]
+       '&add   ($a,$a,$t4);'.                  # h+=Sigma0(a) from the past
+       '&and   ($t1,$f,$e)',
+       '&bic   ($t4,$g,$e)',
+       '&eor   ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+       '&add   ($a,$a,$t2)',                   # h+=Maj(a,b,c) from the past
+       '&orr   ($t1,$t1,$t4)',                 # Ch(e,f,g)
+       '&eor   ($t0,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',   # Sigma1(e)
+       '&eor   ($t4,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+       '&add   ($h,$h,$t1)',                   # h+=Ch(e,f,g)
+       '&ror   ($t0,$t0,"#$Sigma1[0]")',
+       '&eor   ($t2,$a,$b)',                   # a^b, b^c in next round
+       '&eor   ($t4,$t4,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',   # Sigma0(a)
+       '&add   ($h,$h,$t0)',                   # h+=Sigma1(e)
+       '&ldr   ($t1,sprintf "[sp,#%d]",4*(($j+1)&15))  if (($j&15)!=15);'.
+       '&ldr   ($t1,"[$Ktbl]")                         if ($j==15);'.
+       '&and   ($t3,$t3,$t2)',                 # (b^c)&=(a^b)
+       '&ror   ($t4,$t4,"#$Sigma0[0]")',
+       '&add   ($d,$d,$h)',                    # d+=h
+       '&eor   ($t3,$t3,$b)',                  # Maj(a,b,c)
+       '$j++;  unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+       )
+}
+
+$code.=<<___;
+#ifdef __KERNEL__
+.globl sha256_block_neon
+#endif
+.type  sha256_block_neon,%function
+.align 4
+sha256_block_neon:
+.Lneon_entry:
+       stp     x29, x30, [sp, #-16]!
+       mov     x29, sp
+       sub     sp,sp,#16*4
+
+       adr     $Ktbl,.LK256
+       add     $num,$inp,$num,lsl#6    // len to point at the end of inp
+
+       ld1.8   {@X[0]},[$inp], #16
+       ld1.8   {@X[1]},[$inp], #16
+       ld1.8   {@X[2]},[$inp], #16
+       ld1.8   {@X[3]},[$inp], #16
+       ld1.32  {$T0},[$Ktbl], #16
+       ld1.32  {$T1},[$Ktbl], #16
+       ld1.32  {$T2},[$Ktbl], #16
+       ld1.32  {$T3},[$Ktbl], #16
+       rev32   @X[0],@X[0]             // yes, even on
+       rev32   @X[1],@X[1]             // big-endian
+       rev32   @X[2],@X[2]
+       rev32   @X[3],@X[3]
+       mov     $Xfer,sp
+       add.32  $T0,$T0,@X[0]
+       add.32  $T1,$T1,@X[1]
+       add.32  $T2,$T2,@X[2]
+       st1.32  {$T0-$T1},[$Xfer], #32
+       add.32  $T3,$T3,@X[3]
+       st1.32  {$T2-$T3},[$Xfer]
+       sub     $Xfer,$Xfer,#32
+
+       ldp     $A,$B,[$ctx]
+       ldp     $C,$D,[$ctx,#8]
+       ldp     $E,$F,[$ctx,#16]
+       ldp     $G,$H,[$ctx,#24]
+       ldr     $t1,[sp,#0]
+       mov     $t2,wzr
+       eor     $t3,$B,$C
+       mov     $t4,wzr
+       b       .L_00_48
+
+.align 4
+.L_00_48:
+___
+       &Xupdate(\&body_00_15);
+       &Xupdate(\&body_00_15);
+       &Xupdate(\&body_00_15);
+       &Xupdate(\&body_00_15);
+$code.=<<___;
+       cmp     $t1,#0                          // check for K256 terminator
+       ldr     $t1,[sp,#0]
+       sub     $Xfer,$Xfer,#64
+       bne     .L_00_48
+
+       sub     $Ktbl,$Ktbl,#256                // rewind $Ktbl
+       cmp     $inp,$num
+       mov     $Xfer, #64
+       csel    $Xfer, $Xfer, xzr, eq
+       sub     $inp,$inp,$Xfer                 // avoid SEGV
+       mov     $Xfer,sp
+___
+       &Xpreload(\&body_00_15);
+       &Xpreload(\&body_00_15);
+       &Xpreload(\&body_00_15);
+       &Xpreload(\&body_00_15);
+$code.=<<___;
+       add     $A,$A,$t4                       // h+=Sigma0(a) from the past
+       ldp     $t0,$t1,[$ctx,#0]
+       add     $A,$A,$t2                       // h+=Maj(a,b,c) from the past
+       ldp     $t2,$t3,[$ctx,#8]
+       add     $A,$A,$t0                       // accumulate
+       add     $B,$B,$t1
+       ldp     $t0,$t1,[$ctx,#16]
+       add     $C,$C,$t2
+       add     $D,$D,$t3
+       ldp     $t2,$t3,[$ctx,#24]
+       add     $E,$E,$t0
+       add     $F,$F,$t1
+        ldr    $t1,[sp,#0]
+       stp     $A,$B,[$ctx,#0]
+       add     $G,$G,$t2
+        mov    $t2,wzr
+       stp     $C,$D,[$ctx,#8]
+       add     $H,$H,$t3
+       stp     $E,$F,[$ctx,#16]
+        eor    $t3,$B,$C
+       stp     $G,$H,[$ctx,#24]
+        mov    $t4,wzr
+        mov    $Xfer,sp
+       b.ne    .L_00_48
+
+       ldr     x29,[x29]
+       add     sp,sp,#16*4+16
+       ret
+.size  sha256_block_neon,.-sha256_block_neon
+___
+}
+
+$code.=<<___;
+#ifndef        __KERNEL__
+.comm  OPENSSL_armcap_P,4,4
+#endif
+___
+
+{   my  %opcode = (
+       "sha256h"       => 0x5e004000,  "sha256h2"      => 0x5e005000,
+       "sha256su0"     => 0x5e282800,  "sha256su1"     => 0x5e006000   );
+
+    sub unsha256 {
+       my ($mnemonic,$arg)=@_;
+
+       $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+       &&
+       sprintf ".inst\t0x%08x\t//%s %s",
+                       $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+                       $mnemonic,$arg;
+    }
+}
+
+open SELF,$0;
+while(<SELF>) {
+        next if (/^#!/);
+        last if (!s/^#/\/\// and !/^$/);
+        print;
+}
+close SELF;
+
+foreach(split("\n",$code)) {
+
+       s/\`([^\`]*)\`/eval($1)/ge;
+
+       s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge;
+
+       s/\bq([0-9]+)\b/v$1.16b/g;              # old->new registers
+
+       s/\.[ui]?8(\s)/$1/;
+       s/\.\w?32\b//           and s/\.16b/\.4s/g;
+       m/(ld|st)1[^\[]+\[0\]/  and s/\.4s/\.s/g;
+
+       print $_,"\n";
+}
+
+close STDOUT;
diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped
new file mode 100644 (file)
index 0000000..bd0f59f
--- /dev/null
@@ -0,0 +1,1085 @@
+// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the OpenSSL license (the "License").  You may not use
+// this file except in compliance with the License.  You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+
+// ====================================================================
+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+// project. The module is, however, dual licensed under OpenSSL and
+// CRYPTOGAMS licenses depending on where you obtain it. For further
+// details see http://www.openssl.org/~appro/cryptogams/.
+//
+// Permission to use under GPLv2 terms is granted.
+// ====================================================================
+//
+// SHA256/512 for ARMv8.
+//
+// Performance in cycles per processed byte and improvement coefficient
+// over code generated with "default" compiler:
+//
+//             SHA256-hw       SHA256(*)       SHA512
+// Apple A7    1.97            10.5 (+33%)     6.73 (-1%(**))
+// Cortex-A53  2.38            15.5 (+115%)    10.0 (+150%(***))
+// Cortex-A57  2.31            11.6 (+86%)     7.51 (+260%(***))
+// Denver      2.01            10.5 (+26%)     6.70 (+8%)
+// X-Gene                      20.0 (+100%)    12.8 (+300%(***))
+// Mongoose    2.36            13.0 (+50%)     8.36 (+33%)
+//
+// (*) Software SHA256 results are of lesser relevance, presented
+//     mostly for informational purposes.
+// (**)        The result is a trade-off: it's possible to improve it by
+//     10% (or by 1 cycle per round), but at the cost of 20% loss
+//     on Cortex-A53 (or by 4 cycles per round).
+// (***)       Super-impressive coefficients over gcc-generated code are
+//     indication of some compiler "pathology", most notably code
+//     generated with -mgeneral-regs-only is significanty faster
+//     and the gap is only 40-90%.
+//
+// October 2016.
+//
+// Originally it was reckoned that it makes no sense to implement NEON
+// version of SHA256 for 64-bit processors. This is because performance
+// improvement on most wide-spread Cortex-A5x processors was observed
+// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
+// observed that 32-bit NEON SHA256 performs significantly better than
+// 64-bit scalar version on *some* of the more recent processors. As
+// result 64-bit NEON version of SHA256 was added to provide best
+// all-round performance. For example it executes ~30% faster on X-Gene
+// and Mongoose. [For reference, NEON version of SHA512 is bound to
+// deliver much less improvement, likely *negative* on Cortex-A5x.
+// Which is why NEON support is limited to SHA256.]
+
+#ifndef        __KERNEL__
+# include "arm_arch.h"
+#endif
+
+.text
+
+.extern        OPENSSL_armcap_P
+.globl sha512_block_data_order
+.type  sha512_block_data_order,%function
+.align 6
+sha512_block_data_order:
+       stp     x29,x30,[sp,#-128]!
+       add     x29,sp,#0
+
+       stp     x19,x20,[sp,#16]
+       stp     x21,x22,[sp,#32]
+       stp     x23,x24,[sp,#48]
+       stp     x25,x26,[sp,#64]
+       stp     x27,x28,[sp,#80]
+       sub     sp,sp,#4*8
+
+       ldp     x20,x21,[x0]                            // load context
+       ldp     x22,x23,[x0,#2*8]
+       ldp     x24,x25,[x0,#4*8]
+       add     x2,x1,x2,lsl#7  // end of input
+       ldp     x26,x27,[x0,#6*8]
+       adr     x30,.LK512
+       stp     x0,x2,[x29,#96]
+
+.Loop:
+       ldp     x3,x4,[x1],#2*8
+       ldr     x19,[x30],#8                    // *K++
+       eor     x28,x21,x22                             // magic seed
+       str     x1,[x29,#112]
+#ifndef        __AARCH64EB__
+       rev     x3,x3                   // 0
+#endif
+       ror     x16,x24,#14
+       add     x27,x27,x19                     // h+=K[i]
+       eor     x6,x24,x24,ror#23
+       and     x17,x25,x24
+       bic     x19,x26,x24
+       add     x27,x27,x3                      // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x20,x21                     // a^b, b^c in next round
+       eor     x16,x16,x6,ror#18       // Sigma1(e)
+       ror     x6,x20,#28
+       add     x27,x27,x17                     // h+=Ch(e,f,g)
+       eor     x17,x20,x20,ror#5
+       add     x27,x27,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x23,x23,x27                     // d+=h
+       eor     x28,x28,x21                     // Maj(a,b,c)
+       eor     x17,x6,x17,ror#34       // Sigma0(a)
+       add     x27,x27,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x27,x27,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x4,x4                   // 1
+#endif
+       ldp     x5,x6,[x1],#2*8
+       add     x27,x27,x17                     // h+=Sigma0(a)
+       ror     x16,x23,#14
+       add     x26,x26,x28                     // h+=K[i]
+       eor     x7,x23,x23,ror#23
+       and     x17,x24,x23
+       bic     x28,x25,x23
+       add     x26,x26,x4                      // h+=X[i]
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x27,x20                     // a^b, b^c in next round
+       eor     x16,x16,x7,ror#18       // Sigma1(e)
+       ror     x7,x27,#28
+       add     x26,x26,x17                     // h+=Ch(e,f,g)
+       eor     x17,x27,x27,ror#5
+       add     x26,x26,x16                     // h+=Sigma1(e)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       add     x22,x22,x26                     // d+=h
+       eor     x19,x19,x20                     // Maj(a,b,c)
+       eor     x17,x7,x17,ror#34       // Sigma0(a)
+       add     x26,x26,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       //add   x26,x26,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x5,x5                   // 2
+#endif
+       add     x26,x26,x17                     // h+=Sigma0(a)
+       ror     x16,x22,#14
+       add     x25,x25,x19                     // h+=K[i]
+       eor     x8,x22,x22,ror#23
+       and     x17,x23,x22
+       bic     x19,x24,x22
+       add     x25,x25,x5                      // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x26,x27                     // a^b, b^c in next round
+       eor     x16,x16,x8,ror#18       // Sigma1(e)
+       ror     x8,x26,#28
+       add     x25,x25,x17                     // h+=Ch(e,f,g)
+       eor     x17,x26,x26,ror#5
+       add     x25,x25,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x21,x21,x25                     // d+=h
+       eor     x28,x28,x27                     // Maj(a,b,c)
+       eor     x17,x8,x17,ror#34       // Sigma0(a)
+       add     x25,x25,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x25,x25,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x6,x6                   // 3
+#endif
+       ldp     x7,x8,[x1],#2*8
+       add     x25,x25,x17                     // h+=Sigma0(a)
+       ror     x16,x21,#14
+       add     x24,x24,x28                     // h+=K[i]
+       eor     x9,x21,x21,ror#23
+       and     x17,x22,x21
+       bic     x28,x23,x21
+       add     x24,x24,x6                      // h+=X[i]
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x25,x26                     // a^b, b^c in next round
+       eor     x16,x16,x9,ror#18       // Sigma1(e)
+       ror     x9,x25,#28
+       add     x24,x24,x17                     // h+=Ch(e,f,g)
+       eor     x17,x25,x25,ror#5
+       add     x24,x24,x16                     // h+=Sigma1(e)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       add     x20,x20,x24                     // d+=h
+       eor     x19,x19,x26                     // Maj(a,b,c)
+       eor     x17,x9,x17,ror#34       // Sigma0(a)
+       add     x24,x24,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       //add   x24,x24,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x7,x7                   // 4
+#endif
+       add     x24,x24,x17                     // h+=Sigma0(a)
+       ror     x16,x20,#14
+       add     x23,x23,x19                     // h+=K[i]
+       eor     x10,x20,x20,ror#23
+       and     x17,x21,x20
+       bic     x19,x22,x20
+       add     x23,x23,x7                      // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x24,x25                     // a^b, b^c in next round
+       eor     x16,x16,x10,ror#18      // Sigma1(e)
+       ror     x10,x24,#28
+       add     x23,x23,x17                     // h+=Ch(e,f,g)
+       eor     x17,x24,x24,ror#5
+       add     x23,x23,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x27,x27,x23                     // d+=h
+       eor     x28,x28,x25                     // Maj(a,b,c)
+       eor     x17,x10,x17,ror#34      // Sigma0(a)
+       add     x23,x23,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x23,x23,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x8,x8                   // 5
+#endif
+       ldp     x9,x10,[x1],#2*8
+       add     x23,x23,x17                     // h+=Sigma0(a)
+       ror     x16,x27,#14
+       add     x22,x22,x28                     // h+=K[i]
+       eor     x11,x27,x27,ror#23
+       and     x17,x20,x27
+       bic     x28,x21,x27
+       add     x22,x22,x8                      // h+=X[i]
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x23,x24                     // a^b, b^c in next round
+       eor     x16,x16,x11,ror#18      // Sigma1(e)
+       ror     x11,x23,#28
+       add     x22,x22,x17                     // h+=Ch(e,f,g)
+       eor     x17,x23,x23,ror#5
+       add     x22,x22,x16                     // h+=Sigma1(e)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       add     x26,x26,x22                     // d+=h
+       eor     x19,x19,x24                     // Maj(a,b,c)
+       eor     x17,x11,x17,ror#34      // Sigma0(a)
+       add     x22,x22,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       //add   x22,x22,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x9,x9                   // 6
+#endif
+       add     x22,x22,x17                     // h+=Sigma0(a)
+       ror     x16,x26,#14
+       add     x21,x21,x19                     // h+=K[i]
+       eor     x12,x26,x26,ror#23
+       and     x17,x27,x26
+       bic     x19,x20,x26
+       add     x21,x21,x9                      // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x22,x23                     // a^b, b^c in next round
+       eor     x16,x16,x12,ror#18      // Sigma1(e)
+       ror     x12,x22,#28
+       add     x21,x21,x17                     // h+=Ch(e,f,g)
+       eor     x17,x22,x22,ror#5
+       add     x21,x21,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x25,x25,x21                     // d+=h
+       eor     x28,x28,x23                     // Maj(a,b,c)
+       eor     x17,x12,x17,ror#34      // Sigma0(a)
+       add     x21,x21,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x21,x21,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x10,x10                 // 7
+#endif
+       ldp     x11,x12,[x1],#2*8
+       add     x21,x21,x17                     // h+=Sigma0(a)
+       ror     x16,x25,#14
+       add     x20,x20,x28                     // h+=K[i]
+       eor     x13,x25,x25,ror#23
+       and     x17,x26,x25
+       bic     x28,x27,x25
+       add     x20,x20,x10                     // h+=X[i]
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x21,x22                     // a^b, b^c in next round
+       eor     x16,x16,x13,ror#18      // Sigma1(e)
+       ror     x13,x21,#28
+       add     x20,x20,x17                     // h+=Ch(e,f,g)
+       eor     x17,x21,x21,ror#5
+       add     x20,x20,x16                     // h+=Sigma1(e)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       add     x24,x24,x20                     // d+=h
+       eor     x19,x19,x22                     // Maj(a,b,c)
+       eor     x17,x13,x17,ror#34      // Sigma0(a)
+       add     x20,x20,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       //add   x20,x20,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x11,x11                 // 8
+#endif
+       add     x20,x20,x17                     // h+=Sigma0(a)
+       ror     x16,x24,#14
+       add     x27,x27,x19                     // h+=K[i]
+       eor     x14,x24,x24,ror#23
+       and     x17,x25,x24
+       bic     x19,x26,x24
+       add     x27,x27,x11                     // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x20,x21                     // a^b, b^c in next round
+       eor     x16,x16,x14,ror#18      // Sigma1(e)
+       ror     x14,x20,#28
+       add     x27,x27,x17                     // h+=Ch(e,f,g)
+       eor     x17,x20,x20,ror#5
+       add     x27,x27,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x23,x23,x27                     // d+=h
+       eor     x28,x28,x21                     // Maj(a,b,c)
+       eor     x17,x14,x17,ror#34      // Sigma0(a)
+       add     x27,x27,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x27,x27,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x12,x12                 // 9
+#endif
+       ldp     x13,x14,[x1],#2*8
+       add     x27,x27,x17                     // h+=Sigma0(a)
+       ror     x16,x23,#14
+       add     x26,x26,x28                     // h+=K[i]
+       eor     x15,x23,x23,ror#23
+       and     x17,x24,x23
+       bic     x28,x25,x23
+       add     x26,x26,x12                     // h+=X[i]
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x27,x20                     // a^b, b^c in next round
+       eor     x16,x16,x15,ror#18      // Sigma1(e)
+       ror     x15,x27,#28
+       add     x26,x26,x17                     // h+=Ch(e,f,g)
+       eor     x17,x27,x27,ror#5
+       add     x26,x26,x16                     // h+=Sigma1(e)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       add     x22,x22,x26                     // d+=h
+       eor     x19,x19,x20                     // Maj(a,b,c)
+       eor     x17,x15,x17,ror#34      // Sigma0(a)
+       add     x26,x26,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       //add   x26,x26,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x13,x13                 // 10
+#endif
+       add     x26,x26,x17                     // h+=Sigma0(a)
+       ror     x16,x22,#14
+       add     x25,x25,x19                     // h+=K[i]
+       eor     x0,x22,x22,ror#23
+       and     x17,x23,x22
+       bic     x19,x24,x22
+       add     x25,x25,x13                     // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x26,x27                     // a^b, b^c in next round
+       eor     x16,x16,x0,ror#18       // Sigma1(e)
+       ror     x0,x26,#28
+       add     x25,x25,x17                     // h+=Ch(e,f,g)
+       eor     x17,x26,x26,ror#5
+       add     x25,x25,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x21,x21,x25                     // d+=h
+       eor     x28,x28,x27                     // Maj(a,b,c)
+       eor     x17,x0,x17,ror#34       // Sigma0(a)
+       add     x25,x25,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x25,x25,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x14,x14                 // 11
+#endif
+       ldp     x15,x0,[x1],#2*8
+       add     x25,x25,x17                     // h+=Sigma0(a)
+       str     x6,[sp,#24]
+       ror     x16,x21,#14
+       add     x24,x24,x28                     // h+=K[i]
+       eor     x6,x21,x21,ror#23
+       and     x17,x22,x21
+       bic     x28,x23,x21
+       add     x24,x24,x14                     // h+=X[i]
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x25,x26                     // a^b, b^c in next round
+       eor     x16,x16,x6,ror#18       // Sigma1(e)
+       ror     x6,x25,#28
+       add     x24,x24,x17                     // h+=Ch(e,f,g)
+       eor     x17,x25,x25,ror#5
+       add     x24,x24,x16                     // h+=Sigma1(e)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       add     x20,x20,x24                     // d+=h
+       eor     x19,x19,x26                     // Maj(a,b,c)
+       eor     x17,x6,x17,ror#34       // Sigma0(a)
+       add     x24,x24,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       //add   x24,x24,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x15,x15                 // 12
+#endif
+       add     x24,x24,x17                     // h+=Sigma0(a)
+       str     x7,[sp,#0]
+       ror     x16,x20,#14
+       add     x23,x23,x19                     // h+=K[i]
+       eor     x7,x20,x20,ror#23
+       and     x17,x21,x20
+       bic     x19,x22,x20
+       add     x23,x23,x15                     // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x24,x25                     // a^b, b^c in next round
+       eor     x16,x16,x7,ror#18       // Sigma1(e)
+       ror     x7,x24,#28
+       add     x23,x23,x17                     // h+=Ch(e,f,g)
+       eor     x17,x24,x24,ror#5
+       add     x23,x23,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x27,x27,x23                     // d+=h
+       eor     x28,x28,x25                     // Maj(a,b,c)
+       eor     x17,x7,x17,ror#34       // Sigma0(a)
+       add     x23,x23,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x23,x23,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x0,x0                   // 13
+#endif
+       ldp     x1,x2,[x1]
+       add     x23,x23,x17                     // h+=Sigma0(a)
+       str     x8,[sp,#8]
+       ror     x16,x27,#14
+       add     x22,x22,x28                     // h+=K[i]
+       eor     x8,x27,x27,ror#23
+       and     x17,x20,x27
+       bic     x28,x21,x27
+       add     x22,x22,x0                      // h+=X[i]
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x23,x24                     // a^b, b^c in next round
+       eor     x16,x16,x8,ror#18       // Sigma1(e)
+       ror     x8,x23,#28
+       add     x22,x22,x17                     // h+=Ch(e,f,g)
+       eor     x17,x23,x23,ror#5
+       add     x22,x22,x16                     // h+=Sigma1(e)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       add     x26,x26,x22                     // d+=h
+       eor     x19,x19,x24                     // Maj(a,b,c)
+       eor     x17,x8,x17,ror#34       // Sigma0(a)
+       add     x22,x22,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       //add   x22,x22,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x1,x1                   // 14
+#endif
+       ldr     x6,[sp,#24]
+       add     x22,x22,x17                     // h+=Sigma0(a)
+       str     x9,[sp,#16]
+       ror     x16,x26,#14
+       add     x21,x21,x19                     // h+=K[i]
+       eor     x9,x26,x26,ror#23
+       and     x17,x27,x26
+       bic     x19,x20,x26
+       add     x21,x21,x1                      // h+=X[i]
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x22,x23                     // a^b, b^c in next round
+       eor     x16,x16,x9,ror#18       // Sigma1(e)
+       ror     x9,x22,#28
+       add     x21,x21,x17                     // h+=Ch(e,f,g)
+       eor     x17,x22,x22,ror#5
+       add     x21,x21,x16                     // h+=Sigma1(e)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       add     x25,x25,x21                     // d+=h
+       eor     x28,x28,x23                     // Maj(a,b,c)
+       eor     x17,x9,x17,ror#34       // Sigma0(a)
+       add     x21,x21,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       //add   x21,x21,x17                     // h+=Sigma0(a)
+#ifndef        __AARCH64EB__
+       rev     x2,x2                   // 15
+#endif
+       ldr     x7,[sp,#0]
+       add     x21,x21,x17                     // h+=Sigma0(a)
+       str     x10,[sp,#24]
+       ror     x16,x25,#14
+       add     x20,x20,x28                     // h+=K[i]
+       ror     x9,x4,#1
+       and     x17,x26,x25
+       ror     x8,x1,#19
+       bic     x28,x27,x25
+       ror     x10,x21,#28
+       add     x20,x20,x2                      // h+=X[i]
+       eor     x16,x16,x25,ror#18
+       eor     x9,x9,x4,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x21,x22                     // a^b, b^c in next round
+       eor     x16,x16,x25,ror#41      // Sigma1(e)
+       eor     x10,x10,x21,ror#34
+       add     x20,x20,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x8,x8,x1,ror#61
+       eor     x9,x9,x4,lsr#7  // sigma0(X[i+1])
+       add     x20,x20,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x22                     // Maj(a,b,c)
+       eor     x17,x10,x21,ror#39      // Sigma0(a)
+       eor     x8,x8,x1,lsr#6  // sigma1(X[i+14])
+       add     x3,x3,x12
+       add     x24,x24,x20                     // d+=h
+       add     x20,x20,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x3,x3,x9
+       add     x20,x20,x17                     // h+=Sigma0(a)
+       add     x3,x3,x8
+.Loop_16_xx:
+       ldr     x8,[sp,#8]
+       str     x11,[sp,#0]
+       ror     x16,x24,#14
+       add     x27,x27,x19                     // h+=K[i]
+       ror     x10,x5,#1
+       and     x17,x25,x24
+       ror     x9,x2,#19
+       bic     x19,x26,x24
+       ror     x11,x20,#28
+       add     x27,x27,x3                      // h+=X[i]
+       eor     x16,x16,x24,ror#18
+       eor     x10,x10,x5,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x20,x21                     // a^b, b^c in next round
+       eor     x16,x16,x24,ror#41      // Sigma1(e)
+       eor     x11,x11,x20,ror#34
+       add     x27,x27,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x9,x9,x2,ror#61
+       eor     x10,x10,x5,lsr#7        // sigma0(X[i+1])
+       add     x27,x27,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x21                     // Maj(a,b,c)
+       eor     x17,x11,x20,ror#39      // Sigma0(a)
+       eor     x9,x9,x2,lsr#6  // sigma1(X[i+14])
+       add     x4,x4,x13
+       add     x23,x23,x27                     // d+=h
+       add     x27,x27,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x4,x4,x10
+       add     x27,x27,x17                     // h+=Sigma0(a)
+       add     x4,x4,x9
+       ldr     x9,[sp,#16]
+       str     x12,[sp,#8]
+       ror     x16,x23,#14
+       add     x26,x26,x28                     // h+=K[i]
+       ror     x11,x6,#1
+       and     x17,x24,x23
+       ror     x10,x3,#19
+       bic     x28,x25,x23
+       ror     x12,x27,#28
+       add     x26,x26,x4                      // h+=X[i]
+       eor     x16,x16,x23,ror#18
+       eor     x11,x11,x6,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x27,x20                     // a^b, b^c in next round
+       eor     x16,x16,x23,ror#41      // Sigma1(e)
+       eor     x12,x12,x27,ror#34
+       add     x26,x26,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x10,x10,x3,ror#61
+       eor     x11,x11,x6,lsr#7        // sigma0(X[i+1])
+       add     x26,x26,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x20                     // Maj(a,b,c)
+       eor     x17,x12,x27,ror#39      // Sigma0(a)
+       eor     x10,x10,x3,lsr#6        // sigma1(X[i+14])
+       add     x5,x5,x14
+       add     x22,x22,x26                     // d+=h
+       add     x26,x26,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x5,x5,x11
+       add     x26,x26,x17                     // h+=Sigma0(a)
+       add     x5,x5,x10
+       ldr     x10,[sp,#24]
+       str     x13,[sp,#16]
+       ror     x16,x22,#14
+       add     x25,x25,x19                     // h+=K[i]
+       ror     x12,x7,#1
+       and     x17,x23,x22
+       ror     x11,x4,#19
+       bic     x19,x24,x22
+       ror     x13,x26,#28
+       add     x25,x25,x5                      // h+=X[i]
+       eor     x16,x16,x22,ror#18
+       eor     x12,x12,x7,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x26,x27                     // a^b, b^c in next round
+       eor     x16,x16,x22,ror#41      // Sigma1(e)
+       eor     x13,x13,x26,ror#34
+       add     x25,x25,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x11,x11,x4,ror#61
+       eor     x12,x12,x7,lsr#7        // sigma0(X[i+1])
+       add     x25,x25,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x27                     // Maj(a,b,c)
+       eor     x17,x13,x26,ror#39      // Sigma0(a)
+       eor     x11,x11,x4,lsr#6        // sigma1(X[i+14])
+       add     x6,x6,x15
+       add     x21,x21,x25                     // d+=h
+       add     x25,x25,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x6,x6,x12
+       add     x25,x25,x17                     // h+=Sigma0(a)
+       add     x6,x6,x11
+       ldr     x11,[sp,#0]
+       str     x14,[sp,#24]
+       ror     x16,x21,#14
+       add     x24,x24,x28                     // h+=K[i]
+       ror     x13,x8,#1
+       and     x17,x22,x21
+       ror     x12,x5,#19
+       bic     x28,x23,x21
+       ror     x14,x25,#28
+       add     x24,x24,x6                      // h+=X[i]
+       eor     x16,x16,x21,ror#18
+       eor     x13,x13,x8,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x25,x26                     // a^b, b^c in next round
+       eor     x16,x16,x21,ror#41      // Sigma1(e)
+       eor     x14,x14,x25,ror#34
+       add     x24,x24,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x12,x12,x5,ror#61
+       eor     x13,x13,x8,lsr#7        // sigma0(X[i+1])
+       add     x24,x24,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x26                     // Maj(a,b,c)
+       eor     x17,x14,x25,ror#39      // Sigma0(a)
+       eor     x12,x12,x5,lsr#6        // sigma1(X[i+14])
+       add     x7,x7,x0
+       add     x20,x20,x24                     // d+=h
+       add     x24,x24,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x7,x7,x13
+       add     x24,x24,x17                     // h+=Sigma0(a)
+       add     x7,x7,x12
+       ldr     x12,[sp,#8]
+       str     x15,[sp,#0]
+       ror     x16,x20,#14
+       add     x23,x23,x19                     // h+=K[i]
+       ror     x14,x9,#1
+       and     x17,x21,x20
+       ror     x13,x6,#19
+       bic     x19,x22,x20
+       ror     x15,x24,#28
+       add     x23,x23,x7                      // h+=X[i]
+       eor     x16,x16,x20,ror#18
+       eor     x14,x14,x9,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x24,x25                     // a^b, b^c in next round
+       eor     x16,x16,x20,ror#41      // Sigma1(e)
+       eor     x15,x15,x24,ror#34
+       add     x23,x23,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x13,x13,x6,ror#61
+       eor     x14,x14,x9,lsr#7        // sigma0(X[i+1])
+       add     x23,x23,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x25                     // Maj(a,b,c)
+       eor     x17,x15,x24,ror#39      // Sigma0(a)
+       eor     x13,x13,x6,lsr#6        // sigma1(X[i+14])
+       add     x8,x8,x1
+       add     x27,x27,x23                     // d+=h
+       add     x23,x23,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x8,x8,x14
+       add     x23,x23,x17                     // h+=Sigma0(a)
+       add     x8,x8,x13
+       ldr     x13,[sp,#16]
+       str     x0,[sp,#8]
+       ror     x16,x27,#14
+       add     x22,x22,x28                     // h+=K[i]
+       ror     x15,x10,#1
+       and     x17,x20,x27
+       ror     x14,x7,#19
+       bic     x28,x21,x27
+       ror     x0,x23,#28
+       add     x22,x22,x8                      // h+=X[i]
+       eor     x16,x16,x27,ror#18
+       eor     x15,x15,x10,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x23,x24                     // a^b, b^c in next round
+       eor     x16,x16,x27,ror#41      // Sigma1(e)
+       eor     x0,x0,x23,ror#34
+       add     x22,x22,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x14,x14,x7,ror#61
+       eor     x15,x15,x10,lsr#7       // sigma0(X[i+1])
+       add     x22,x22,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x24                     // Maj(a,b,c)
+       eor     x17,x0,x23,ror#39       // Sigma0(a)
+       eor     x14,x14,x7,lsr#6        // sigma1(X[i+14])
+       add     x9,x9,x2
+       add     x26,x26,x22                     // d+=h
+       add     x22,x22,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x9,x9,x15
+       add     x22,x22,x17                     // h+=Sigma0(a)
+       add     x9,x9,x14
+       ldr     x14,[sp,#24]
+       str     x1,[sp,#16]
+       ror     x16,x26,#14
+       add     x21,x21,x19                     // h+=K[i]
+       ror     x0,x11,#1
+       and     x17,x27,x26
+       ror     x15,x8,#19
+       bic     x19,x20,x26
+       ror     x1,x22,#28
+       add     x21,x21,x9                      // h+=X[i]
+       eor     x16,x16,x26,ror#18
+       eor     x0,x0,x11,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x22,x23                     // a^b, b^c in next round
+       eor     x16,x16,x26,ror#41      // Sigma1(e)
+       eor     x1,x1,x22,ror#34
+       add     x21,x21,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x15,x15,x8,ror#61
+       eor     x0,x0,x11,lsr#7 // sigma0(X[i+1])
+       add     x21,x21,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x23                     // Maj(a,b,c)
+       eor     x17,x1,x22,ror#39       // Sigma0(a)
+       eor     x15,x15,x8,lsr#6        // sigma1(X[i+14])
+       add     x10,x10,x3
+       add     x25,x25,x21                     // d+=h
+       add     x21,x21,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x10,x10,x0
+       add     x21,x21,x17                     // h+=Sigma0(a)
+       add     x10,x10,x15
+       ldr     x15,[sp,#0]
+       str     x2,[sp,#24]
+       ror     x16,x25,#14
+       add     x20,x20,x28                     // h+=K[i]
+       ror     x1,x12,#1
+       and     x17,x26,x25
+       ror     x0,x9,#19
+       bic     x28,x27,x25
+       ror     x2,x21,#28
+       add     x20,x20,x10                     // h+=X[i]
+       eor     x16,x16,x25,ror#18
+       eor     x1,x1,x12,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x21,x22                     // a^b, b^c in next round
+       eor     x16,x16,x25,ror#41      // Sigma1(e)
+       eor     x2,x2,x21,ror#34
+       add     x20,x20,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x0,x0,x9,ror#61
+       eor     x1,x1,x12,lsr#7 // sigma0(X[i+1])
+       add     x20,x20,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x22                     // Maj(a,b,c)
+       eor     x17,x2,x21,ror#39       // Sigma0(a)
+       eor     x0,x0,x9,lsr#6  // sigma1(X[i+14])
+       add     x11,x11,x4
+       add     x24,x24,x20                     // d+=h
+       add     x20,x20,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x11,x11,x1
+       add     x20,x20,x17                     // h+=Sigma0(a)
+       add     x11,x11,x0
+       ldr     x0,[sp,#8]
+       str     x3,[sp,#0]
+       ror     x16,x24,#14
+       add     x27,x27,x19                     // h+=K[i]
+       ror     x2,x13,#1
+       and     x17,x25,x24
+       ror     x1,x10,#19
+       bic     x19,x26,x24
+       ror     x3,x20,#28
+       add     x27,x27,x11                     // h+=X[i]
+       eor     x16,x16,x24,ror#18
+       eor     x2,x2,x13,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x20,x21                     // a^b, b^c in next round
+       eor     x16,x16,x24,ror#41      // Sigma1(e)
+       eor     x3,x3,x20,ror#34
+       add     x27,x27,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x1,x1,x10,ror#61
+       eor     x2,x2,x13,lsr#7 // sigma0(X[i+1])
+       add     x27,x27,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x21                     // Maj(a,b,c)
+       eor     x17,x3,x20,ror#39       // Sigma0(a)
+       eor     x1,x1,x10,lsr#6 // sigma1(X[i+14])
+       add     x12,x12,x5
+       add     x23,x23,x27                     // d+=h
+       add     x27,x27,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x12,x12,x2
+       add     x27,x27,x17                     // h+=Sigma0(a)
+       add     x12,x12,x1
+       ldr     x1,[sp,#16]
+       str     x4,[sp,#8]
+       ror     x16,x23,#14
+       add     x26,x26,x28                     // h+=K[i]
+       ror     x3,x14,#1
+       and     x17,x24,x23
+       ror     x2,x11,#19
+       bic     x28,x25,x23
+       ror     x4,x27,#28
+       add     x26,x26,x12                     // h+=X[i]
+       eor     x16,x16,x23,ror#18
+       eor     x3,x3,x14,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x27,x20                     // a^b, b^c in next round
+       eor     x16,x16,x23,ror#41      // Sigma1(e)
+       eor     x4,x4,x27,ror#34
+       add     x26,x26,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x2,x2,x11,ror#61
+       eor     x3,x3,x14,lsr#7 // sigma0(X[i+1])
+       add     x26,x26,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x20                     // Maj(a,b,c)
+       eor     x17,x4,x27,ror#39       // Sigma0(a)
+       eor     x2,x2,x11,lsr#6 // sigma1(X[i+14])
+       add     x13,x13,x6
+       add     x22,x22,x26                     // d+=h
+       add     x26,x26,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x13,x13,x3
+       add     x26,x26,x17                     // h+=Sigma0(a)
+       add     x13,x13,x2
+       ldr     x2,[sp,#24]
+       str     x5,[sp,#16]
+       ror     x16,x22,#14
+       add     x25,x25,x19                     // h+=K[i]
+       ror     x4,x15,#1
+       and     x17,x23,x22
+       ror     x3,x12,#19
+       bic     x19,x24,x22
+       ror     x5,x26,#28
+       add     x25,x25,x13                     // h+=X[i]
+       eor     x16,x16,x22,ror#18
+       eor     x4,x4,x15,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x26,x27                     // a^b, b^c in next round
+       eor     x16,x16,x22,ror#41      // Sigma1(e)
+       eor     x5,x5,x26,ror#34
+       add     x25,x25,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x3,x3,x12,ror#61
+       eor     x4,x4,x15,lsr#7 // sigma0(X[i+1])
+       add     x25,x25,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x27                     // Maj(a,b,c)
+       eor     x17,x5,x26,ror#39       // Sigma0(a)
+       eor     x3,x3,x12,lsr#6 // sigma1(X[i+14])
+       add     x14,x14,x7
+       add     x21,x21,x25                     // d+=h
+       add     x25,x25,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x14,x14,x4
+       add     x25,x25,x17                     // h+=Sigma0(a)
+       add     x14,x14,x3
+       ldr     x3,[sp,#0]
+       str     x6,[sp,#24]
+       ror     x16,x21,#14
+       add     x24,x24,x28                     // h+=K[i]
+       ror     x5,x0,#1
+       and     x17,x22,x21
+       ror     x4,x13,#19
+       bic     x28,x23,x21
+       ror     x6,x25,#28
+       add     x24,x24,x14                     // h+=X[i]
+       eor     x16,x16,x21,ror#18
+       eor     x5,x5,x0,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x25,x26                     // a^b, b^c in next round
+       eor     x16,x16,x21,ror#41      // Sigma1(e)
+       eor     x6,x6,x25,ror#34
+       add     x24,x24,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x4,x4,x13,ror#61
+       eor     x5,x5,x0,lsr#7  // sigma0(X[i+1])
+       add     x24,x24,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x26                     // Maj(a,b,c)
+       eor     x17,x6,x25,ror#39       // Sigma0(a)
+       eor     x4,x4,x13,lsr#6 // sigma1(X[i+14])
+       add     x15,x15,x8
+       add     x20,x20,x24                     // d+=h
+       add     x24,x24,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x15,x15,x5
+       add     x24,x24,x17                     // h+=Sigma0(a)
+       add     x15,x15,x4
+       ldr     x4,[sp,#8]
+       str     x7,[sp,#0]
+       ror     x16,x20,#14
+       add     x23,x23,x19                     // h+=K[i]
+       ror     x6,x1,#1
+       and     x17,x21,x20
+       ror     x5,x14,#19
+       bic     x19,x22,x20
+       ror     x7,x24,#28
+       add     x23,x23,x15                     // h+=X[i]
+       eor     x16,x16,x20,ror#18
+       eor     x6,x6,x1,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x24,x25                     // a^b, b^c in next round
+       eor     x16,x16,x20,ror#41      // Sigma1(e)
+       eor     x7,x7,x24,ror#34
+       add     x23,x23,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x5,x5,x14,ror#61
+       eor     x6,x6,x1,lsr#7  // sigma0(X[i+1])
+       add     x23,x23,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x25                     // Maj(a,b,c)
+       eor     x17,x7,x24,ror#39       // Sigma0(a)
+       eor     x5,x5,x14,lsr#6 // sigma1(X[i+14])
+       add     x0,x0,x9
+       add     x27,x27,x23                     // d+=h
+       add     x23,x23,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x0,x0,x6
+       add     x23,x23,x17                     // h+=Sigma0(a)
+       add     x0,x0,x5
+       ldr     x5,[sp,#16]
+       str     x8,[sp,#8]
+       ror     x16,x27,#14
+       add     x22,x22,x28                     // h+=K[i]
+       ror     x7,x2,#1
+       and     x17,x20,x27
+       ror     x6,x15,#19
+       bic     x28,x21,x27
+       ror     x8,x23,#28
+       add     x22,x22,x0                      // h+=X[i]
+       eor     x16,x16,x27,ror#18
+       eor     x7,x7,x2,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x23,x24                     // a^b, b^c in next round
+       eor     x16,x16,x27,ror#41      // Sigma1(e)
+       eor     x8,x8,x23,ror#34
+       add     x22,x22,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x6,x6,x15,ror#61
+       eor     x7,x7,x2,lsr#7  // sigma0(X[i+1])
+       add     x22,x22,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x24                     // Maj(a,b,c)
+       eor     x17,x8,x23,ror#39       // Sigma0(a)
+       eor     x6,x6,x15,lsr#6 // sigma1(X[i+14])
+       add     x1,x1,x10
+       add     x26,x26,x22                     // d+=h
+       add     x22,x22,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x1,x1,x7
+       add     x22,x22,x17                     // h+=Sigma0(a)
+       add     x1,x1,x6
+       ldr     x6,[sp,#24]
+       str     x9,[sp,#16]
+       ror     x16,x26,#14
+       add     x21,x21,x19                     // h+=K[i]
+       ror     x8,x3,#1
+       and     x17,x27,x26
+       ror     x7,x0,#19
+       bic     x19,x20,x26
+       ror     x9,x22,#28
+       add     x21,x21,x1                      // h+=X[i]
+       eor     x16,x16,x26,ror#18
+       eor     x8,x8,x3,ror#8
+       orr     x17,x17,x19                     // Ch(e,f,g)
+       eor     x19,x22,x23                     // a^b, b^c in next round
+       eor     x16,x16,x26,ror#41      // Sigma1(e)
+       eor     x9,x9,x22,ror#34
+       add     x21,x21,x17                     // h+=Ch(e,f,g)
+       and     x28,x28,x19                     // (b^c)&=(a^b)
+       eor     x7,x7,x0,ror#61
+       eor     x8,x8,x3,lsr#7  // sigma0(X[i+1])
+       add     x21,x21,x16                     // h+=Sigma1(e)
+       eor     x28,x28,x23                     // Maj(a,b,c)
+       eor     x17,x9,x22,ror#39       // Sigma0(a)
+       eor     x7,x7,x0,lsr#6  // sigma1(X[i+14])
+       add     x2,x2,x11
+       add     x25,x25,x21                     // d+=h
+       add     x21,x21,x28                     // h+=Maj(a,b,c)
+       ldr     x28,[x30],#8            // *K++, x19 in next round
+       add     x2,x2,x8
+       add     x21,x21,x17                     // h+=Sigma0(a)
+       add     x2,x2,x7
+       ldr     x7,[sp,#0]
+       str     x10,[sp,#24]
+       ror     x16,x25,#14
+       add     x20,x20,x28                     // h+=K[i]
+       ror     x9,x4,#1
+       and     x17,x26,x25
+       ror     x8,x1,#19
+       bic     x28,x27,x25
+       ror     x10,x21,#28
+       add     x20,x20,x2                      // h+=X[i]
+       eor     x16,x16,x25,ror#18
+       eor     x9,x9,x4,ror#8
+       orr     x17,x17,x28                     // Ch(e,f,g)
+       eor     x28,x21,x22                     // a^b, b^c in next round
+       eor     x16,x16,x25,ror#41      // Sigma1(e)
+       eor     x10,x10,x21,ror#34
+       add     x20,x20,x17                     // h+=Ch(e,f,g)
+       and     x19,x19,x28                     // (b^c)&=(a^b)
+       eor     x8,x8,x1,ror#61
+       eor     x9,x9,x4,lsr#7  // sigma0(X[i+1])
+       add     x20,x20,x16                     // h+=Sigma1(e)
+       eor     x19,x19,x22                     // Maj(a,b,c)
+       eor     x17,x10,x21,ror#39      // Sigma0(a)
+       eor     x8,x8,x1,lsr#6  // sigma1(X[i+14])
+       add     x3,x3,x12
+       add     x24,x24,x20                     // d+=h
+       add     x20,x20,x19                     // h+=Maj(a,b,c)
+       ldr     x19,[x30],#8            // *K++, x28 in next round
+       add     x3,x3,x9
+       add     x20,x20,x17                     // h+=Sigma0(a)
+       add     x3,x3,x8
+       cbnz    x19,.Loop_16_xx
+
+       ldp     x0,x2,[x29,#96]
+       ldr     x1,[x29,#112]
+       sub     x30,x30,#648            // rewind
+
+       ldp     x3,x4,[x0]
+       ldp     x5,x6,[x0,#2*8]
+       add     x1,x1,#14*8                     // advance input pointer
+       ldp     x7,x8,[x0,#4*8]
+       add     x20,x20,x3
+       ldp     x9,x10,[x0,#6*8]
+       add     x21,x21,x4
+       add     x22,x22,x5
+       add     x23,x23,x6
+       stp     x20,x21,[x0]
+       add     x24,x24,x7
+       add     x25,x25,x8
+       stp     x22,x23,[x0,#2*8]
+       add     x26,x26,x9
+       add     x27,x27,x10
+       cmp     x1,x2
+       stp     x24,x25,[x0,#4*8]
+       stp     x26,x27,[x0,#6*8]
+       b.ne    .Loop
+
+       ldp     x19,x20,[x29,#16]
+       add     sp,sp,#4*8
+       ldp     x21,x22,[x29,#32]
+       ldp     x23,x24,[x29,#48]
+       ldp     x25,x26,[x29,#64]
+       ldp     x27,x28,[x29,#80]
+       ldp     x29,x30,[sp],#128
+       ret
+.size  sha512_block_data_order,.-sha512_block_data_order
+
+.align 6
+.type  .LK512,%object
+.LK512:
+       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0x3956c25bf348b538,0x59f111f1b605d019
+       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0xd807aa98a3030242,0x12835b0145706fbe
+       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xd192e819d6ef5218,0xd69906245565a910
+       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x90befffa23631e28,0xa4506cebde82bde9
+       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xca273eceea26619c,0xd186b8c721c0c207
+       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x28db77f523047d84,0x32caab7b40c72493
+       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
+       .quad   0       // terminator
+.size  .LK512,.-.LK512
+#ifndef        __KERNEL__
+.align 3
+.LOPENSSL_armcap_P:
+# ifdef        __ILP32__
+       .long   OPENSSL_armcap_P-.
+# else
+       .quad   OPENSSL_armcap_P-.
+# endif
+#endif
+.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#ifndef        __KERNEL__
+.comm  OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c
new file mode 100644 (file)
index 0000000..aff35c9
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Linux/arm64 port of the OpenSSL SHA512 implementation for AArch64
+ *
+ * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <asm/neon.h>
+
+MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash for arm64");
+MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
+
+asmlinkage void sha512_block_data_order(u32 *digest, const void *data,
+                                       unsigned int num_blks);
+
+static int sha512_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int len)
+{
+       return sha512_base_do_update(desc, data, len,
+                       (sha512_block_fn *)sha512_block_data_order);
+}
+
+static int sha512_finup(struct shash_desc *desc, const u8 *data,
+                       unsigned int len, u8 *out)
+{
+       if (len)
+               sha512_base_do_update(desc, data, len,
+                       (sha512_block_fn *)sha512_block_data_order);
+       sha512_base_do_finalize(desc,
+                       (sha512_block_fn *)sha512_block_data_order);
+
+       return sha512_base_finish(desc, out);
+}
+
+static int sha512_final(struct shash_desc *desc, u8 *out)
+{
+       return sha512_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg algs[] = { {
+       .digestsize             = SHA512_DIGEST_SIZE,
+       .init                   = sha512_base_init,
+       .update                 = sha512_update,
+       .final                  = sha512_final,
+       .finup                  = sha512_finup,
+       .descsize               = sizeof(struct sha512_state),
+       .base.cra_name          = "sha512",
+       .base.cra_driver_name   = "sha512-arm64",
+       .base.cra_priority      = 150,
+       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
+       .base.cra_blocksize     = SHA512_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+}, {
+       .digestsize             = SHA384_DIGEST_SIZE,
+       .init                   = sha384_base_init,
+       .update                 = sha512_update,
+       .final                  = sha512_final,
+       .finup                  = sha512_finup,
+       .descsize               = sizeof(struct sha512_state),
+       .base.cra_name          = "sha384",
+       .base.cra_driver_name   = "sha384-arm64",
+       .base.cra_priority      = 150,
+       .base.cra_flags         = CRYPTO_ALG_TYPE_SHASH,
+       .base.cra_blocksize     = SHA384_BLOCK_SIZE,
+       .base.cra_module        = THIS_MODULE,
+} };
+
+static int __init sha512_mod_init(void)
+{
+       return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha512_mod_fini(void)
+{
+       crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha512_mod_init);
+module_exit(sha512_mod_fini);
index 7998c17..87f4045 100644 (file)
@@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
 obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
-obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o
+obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
 
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
index aa8b067..31c34ee 100644 (file)
@@ -21,7 +21,6 @@
 
 #include <linux/hardirq.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/err.h>
 #include <crypto/algapi.h>
 #include <crypto/cryptd.h>
 #include <crypto/ctr.h>
 #include <crypto/b128ops.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/api.h>
 #include <asm/crypto/aes.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #ifdef CONFIG_X86_64
 
 
 #define AESNI_ALIGN    16
+#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
 #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1))
 #define RFC4106_HASH_SUBKEY_SIZE 16
+#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
+#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
+#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
 
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
  */
 struct aesni_rfc4106_gcm_ctx {
-       u8 hash_subkey[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
-       struct crypto_aes_ctx aes_key_expanded
-               __attribute__ ((__aligned__(AESNI_ALIGN)));
+       u8 hash_subkey[16] AESNI_ALIGN_ATTR;
+       struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
        u8 nonce[4];
 };
 
-struct aesni_lrw_ctx {
-       struct lrw_table_ctx lrw_table;
-       u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
-};
-
 struct aesni_xts_ctx {
-       u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
-       u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+       u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
+       u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 };
 
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -360,96 +357,95 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
        aesni_dec(ctx, dst, src);
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc,
-                      struct scatterlist *dst, struct scatterlist *src,
-                      unsigned int nbytes)
+static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                                unsigned int len)
+{
+       return aes_set_key_common(crypto_skcipher_tfm(tfm),
+                                 crypto_skcipher_ctx(tfm), key, len);
+}
+
+static int ecb_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_fpu_begin();
        while ((nbytes = walk.nbytes)) {
                aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
                              nbytes & AES_BLOCK_MASK);
                nbytes &= AES_BLOCK_SIZE - 1;
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+               err = skcipher_walk_done(&walk, nbytes);
        }
        kernel_fpu_end();
 
        return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-                      struct scatterlist *dst, struct scatterlist *src,
-                      unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_fpu_begin();
        while ((nbytes = walk.nbytes)) {
                aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
                              nbytes & AES_BLOCK_MASK);
                nbytes &= AES_BLOCK_SIZE - 1;
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+               err = skcipher_walk_done(&walk, nbytes);
        }
        kernel_fpu_end();
 
        return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-                      struct scatterlist *dst, struct scatterlist *src,
-                      unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_fpu_begin();
        while ((nbytes = walk.nbytes)) {
                aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
                              nbytes & AES_BLOCK_MASK, walk.iv);
                nbytes &= AES_BLOCK_SIZE - 1;
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+               err = skcipher_walk_done(&walk, nbytes);
        }
        kernel_fpu_end();
 
        return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc,
-                      struct scatterlist *dst, struct scatterlist *src,
-                      unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_fpu_begin();
        while ((nbytes = walk.nbytes)) {
                aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
                              nbytes & AES_BLOCK_MASK, walk.iv);
                nbytes &= AES_BLOCK_SIZE - 1;
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+               err = skcipher_walk_done(&walk, nbytes);
        }
        kernel_fpu_end();
 
@@ -458,7 +454,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
 
 #ifdef CONFIG_X86_64
 static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
-                           struct blkcipher_walk *walk)
+                           struct skcipher_walk *walk)
 {
        u8 *ctrblk = walk->iv;
        u8 keystream[AES_BLOCK_SIZE];
@@ -491,157 +487,53 @@ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
 }
 #endif
 
-static int ctr_crypt(struct blkcipher_desc *desc,
-                    struct scatterlist *dst, struct scatterlist *src,
-                    unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-       struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-       struct blkcipher_walk walk;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       err = skcipher_walk_virt(&walk, req, true);
 
        kernel_fpu_begin();
        while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
                aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
                                      nbytes & AES_BLOCK_MASK, walk.iv);
                nbytes &= AES_BLOCK_SIZE - 1;
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+               err = skcipher_walk_done(&walk, nbytes);
        }
        if (walk.nbytes) {
                ctr_crypt_final(ctx, &walk);
-               err = blkcipher_walk_done(desc, &walk, 0);
+               err = skcipher_walk_done(&walk, 0);
        }
        kernel_fpu_end();
 
        return err;
 }
-#endif
-
-static int ablk_ecb_init(struct crypto_tfm *tfm)
-{
-       return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
-}
-
-static int ablk_cbc_init(struct crypto_tfm *tfm)
-{
-       return ablk_init_common(tfm, "__driver-cbc-aes-aesni");
-}
-
-#ifdef CONFIG_X86_64
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
-       return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
-}
-
-#endif
-
-#if IS_ENABLED(CONFIG_CRYPTO_PCBC)
-static int ablk_pcbc_init(struct crypto_tfm *tfm)
-{
-       return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))");
-}
-#endif
-
-static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
-{
-       aesni_ecb_enc(ctx, blks, blks, nbytes);
-}
 
-static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
-{
-       aesni_ecb_dec(ctx, blks, blks, nbytes);
-}
-
-static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
                            unsigned int keylen)
 {
-       struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
        int err;
 
-       err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
-                                keylen - AES_BLOCK_SIZE);
+       err = xts_verify_key(tfm, key, keylen);
        if (err)
                return err;
 
-       return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
-}
-
-static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
-{
-       struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       lrw_free_table(&ctx->lrw_table);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
-{
-       struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       be128 buf[8];
-       struct lrw_crypt_req req = {
-               .tbuf = buf,
-               .tbuflen = sizeof(buf),
-
-               .table_ctx = &ctx->lrw_table,
-               .crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
-               .crypt_fn = lrw_xts_encrypt_callback,
-       };
-       int ret;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-       kernel_fpu_begin();
-       ret = lrw_crypt(desc, dst, src, nbytes, &req);
-       kernel_fpu_end();
-
-       return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
-{
-       struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       be128 buf[8];
-       struct lrw_crypt_req req = {
-               .tbuf = buf,
-               .tbuflen = sizeof(buf),
-
-               .table_ctx = &ctx->lrw_table,
-               .crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
-               .crypt_fn = lrw_xts_decrypt_callback,
-       };
-       int ret;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-       kernel_fpu_begin();
-       ret = lrw_crypt(desc, dst, src, nbytes, &req);
-       kernel_fpu_end();
-
-       return ret;
-}
-
-static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
-                           unsigned int keylen)
-{
-       struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-       int err;
-
-       err = xts_check_key(tfm, key, keylen);
-       if (err)
-               return err;
+       keylen /= 2;
 
        /* first half of xts-key is for crypt */
-       err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
+       err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
+                                key, keylen);
        if (err)
                return err;
 
        /* second half of xts-key is for tweak */
-       return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
-                                 keylen / 2);
+       return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
+                                 key + keylen, keylen);
 }
 
 
@@ -650,8 +542,6 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
        aesni_enc(ctx, out, in);
 }
 
-#ifdef CONFIG_X86_64
-
 static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
        glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
@@ -698,83 +588,28 @@ static const struct common_glue_ctx aesni_dec_xts = {
        } }
 };
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-       struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes,
-                                    XTS_TWEAK_CAST(aesni_xts_tweak),
-                                    aes_ctx(ctx->raw_tweak_ctx),
-                                    aes_ctx(ctx->raw_crypt_ctx));
+       return glue_xts_req_128bit(&aesni_enc_xts, req,
+                                  XTS_TWEAK_CAST(aesni_xts_tweak),
+                                  aes_ctx(ctx->raw_tweak_ctx),
+                                  aes_ctx(ctx->raw_crypt_ctx));
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-       struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-       return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes,
-                                    XTS_TWEAK_CAST(aesni_xts_tweak),
-                                    aes_ctx(ctx->raw_tweak_ctx),
-                                    aes_ctx(ctx->raw_crypt_ctx));
-}
-
-#else
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
-{
-       struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       be128 buf[8];
-       struct xts_crypt_req req = {
-               .tbuf = buf,
-               .tbuflen = sizeof(buf),
-
-               .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
-               .tweak_fn = aesni_xts_tweak,
-               .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
-               .crypt_fn = lrw_xts_encrypt_callback,
-       };
-       int ret;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-       kernel_fpu_begin();
-       ret = xts_crypt(desc, dst, src, nbytes, &req);
-       kernel_fpu_end();
-
-       return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
-{
-       struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       be128 buf[8];
-       struct xts_crypt_req req = {
-               .tbuf = buf,
-               .tbuflen = sizeof(buf),
-
-               .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
-               .tweak_fn = aesni_xts_tweak,
-               .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
-               .crypt_fn = lrw_xts_decrypt_callback,
-       };
-       int ret;
-
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-       kernel_fpu_begin();
-       ret = xts_crypt(desc, dst, src, nbytes, &req);
-       kernel_fpu_end();
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       return ret;
+       return glue_xts_req_128bit(&aesni_dec_xts, req,
+                                  XTS_TWEAK_CAST(aesni_xts_tweak),
+                                  aes_ctx(ctx->raw_tweak_ctx),
+                                  aes_ctx(ctx->raw_crypt_ctx));
 }
 
-#endif
-
-#ifdef CONFIG_X86_64
 static int rfc4106_init(struct crypto_aead *aead)
 {
        struct cryptd_aead *cryptd_tfm;
@@ -1077,9 +912,7 @@ static struct crypto_alg aesni_algs[] = { {
        .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_CIPHER,
        .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx) +
-                                 AESNI_ALIGN - 1,
-       .cra_alignmask          = 0,
+       .cra_ctxsize            = CRYPTO_AES_CTX_SIZE,
        .cra_module             = THIS_MODULE,
        .cra_u  = {
                .cipher = {
@@ -1091,14 +924,12 @@ static struct crypto_alg aesni_algs[] = { {
                }
        }
 }, {
-       .cra_name               = "__aes-aesni",
-       .cra_driver_name        = "__driver-aes-aesni",
-       .cra_priority           = 0,
+       .cra_name               = "__aes",
+       .cra_driver_name        = "__aes-aesni",
+       .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
        .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx) +
-                                 AESNI_ALIGN - 1,
-       .cra_alignmask          = 0,
+       .cra_ctxsize            = CRYPTO_AES_CTX_SIZE,
        .cra_module             = THIS_MODULE,
        .cra_u  = {
                .cipher = {
@@ -1109,250 +940,94 @@ static struct crypto_alg aesni_algs[] = { {
                        .cia_decrypt            = __aes_decrypt
                }
        }
-}, {
-       .cra_name               = "__ecb-aes-aesni",
-       .cra_driver_name        = "__driver-ecb-aes-aesni",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx) +
-                                 AESNI_ALIGN - 1,
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
-                       .setkey         = aes_set_key,
-                       .encrypt        = ecb_encrypt,
-                       .decrypt        = ecb_decrypt,
-               },
-       },
-}, {
-       .cra_name               = "__cbc-aes-aesni",
-       .cra_driver_name        = "__driver-cbc-aes-aesni",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx) +
-                                 AESNI_ALIGN - 1,
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
-                       .setkey         = aes_set_key,
-                       .encrypt        = cbc_encrypt,
-                       .decrypt        = cbc_decrypt,
-               },
-       },
-}, {
-       .cra_name               = "ecb(aes)",
-       .cra_driver_name        = "ecb-aes-aesni",
-       .cra_priority           = 400,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_ecb_init,
-       .cra_exit               = ablk_exit,
-       .cra_u = {
-               .ablkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
-                       .setkey         = ablk_set_key,
-                       .encrypt        = ablk_encrypt,
-                       .decrypt        = ablk_decrypt,
+} };
+
+static struct skcipher_alg aesni_skciphers[] = {
+       {
+               .base = {
+                       .cra_name               = "__ecb(aes)",
+                       .cra_driver_name        = "__ecb-aes-aesni",
+                       .cra_priority           = 400,
+                       .cra_flags              = CRYPTO_ALG_INTERNAL,
+                       .cra_blocksize          = AES_BLOCK_SIZE,
+                       .cra_ctxsize            = CRYPTO_AES_CTX_SIZE,
+                       .cra_module             = THIS_MODULE,
                },
-       },
-}, {
-       .cra_name               = "cbc(aes)",
-       .cra_driver_name        = "cbc-aes-aesni",
-       .cra_priority           = 400,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_cbc_init,
-       .cra_exit               = ablk_exit,
-       .cra_u = {
-               .ablkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = ablk_set_key,
-                       .encrypt        = ablk_encrypt,
-                       .decrypt        = ablk_decrypt,
+               .min_keysize    = AES_MIN_KEY_SIZE,
+               .max_keysize    = AES_MAX_KEY_SIZE,
+               .setkey         = aesni_skcipher_setkey,
+               .encrypt        = ecb_encrypt,
+               .decrypt        = ecb_decrypt,
+       }, {
+               .base = {
+                       .cra_name               = "__cbc(aes)",
+                       .cra_driver_name        = "__cbc-aes-aesni",
+                       .cra_priority           = 400,
+                       .cra_flags              = CRYPTO_ALG_INTERNAL,
+                       .cra_blocksize          = AES_BLOCK_SIZE,
+                       .cra_ctxsize            = CRYPTO_AES_CTX_SIZE,
+                       .cra_module             = THIS_MODULE,
                },
-       },
+               .min_keysize    = AES_MIN_KEY_SIZE,
+               .max_keysize    = AES_MAX_KEY_SIZE,
+               .ivsize         = AES_BLOCK_SIZE,
+               .setkey         = aesni_skcipher_setkey,
+               .encrypt        = cbc_encrypt,
+               .decrypt        = cbc_decrypt,
 #ifdef CONFIG_X86_64
-}, {
-       .cra_name               = "__ctr-aes-aesni",
-       .cra_driver_name        = "__driver-ctr-aes-aesni",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx) +
-                                 AESNI_ALIGN - 1,
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = aes_set_key,
-                       .encrypt        = ctr_crypt,
-                       .decrypt        = ctr_crypt,
+       }, {
+               .base = {
+                       .cra_name               = "__ctr(aes)",
+                       .cra_driver_name        = "__ctr-aes-aesni",
+                       .cra_priority           = 400,
+                       .cra_flags              = CRYPTO_ALG_INTERNAL,
+                       .cra_blocksize          = 1,
+                       .cra_ctxsize            = CRYPTO_AES_CTX_SIZE,
+                       .cra_module             = THIS_MODULE,
                },
-       },
-}, {
-       .cra_name               = "ctr(aes)",
-       .cra_driver_name        = "ctr-aes-aesni",
-       .cra_priority           = 400,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_ctr_init,
-       .cra_exit               = ablk_exit,
-       .cra_u = {
-               .ablkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = ablk_set_key,
-                       .encrypt        = ablk_encrypt,
-                       .decrypt        = ablk_encrypt,
-                       .geniv          = "chainiv",
+               .min_keysize    = AES_MIN_KEY_SIZE,
+               .max_keysize    = AES_MAX_KEY_SIZE,
+               .ivsize         = AES_BLOCK_SIZE,
+               .chunksize      = AES_BLOCK_SIZE,
+               .setkey         = aesni_skcipher_setkey,
+               .encrypt        = ctr_crypt,
+               .decrypt        = ctr_crypt,
+       }, {
+               .base = {
+                       .cra_name               = "__xts(aes)",
+                       .cra_driver_name        = "__xts-aes-aesni",
+                       .cra_priority           = 401,
+                       .cra_flags              = CRYPTO_ALG_INTERNAL,
+                       .cra_blocksize          = AES_BLOCK_SIZE,
+                       .cra_ctxsize            = XTS_AES_CTX_SIZE,
+                       .cra_module             = THIS_MODULE,
                },
-       },
+               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
+               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
+               .ivsize         = AES_BLOCK_SIZE,
+               .setkey         = xts_aesni_setkey,
+               .encrypt        = xts_encrypt,
+               .decrypt        = xts_decrypt,
 #endif
+       }
+};
+
+struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
+
+struct {
+       const char *algname;
+       const char *drvname;
+       const char *basename;
+       struct simd_skcipher_alg *simd;
+} aesni_simd_skciphers2[] = {
 #if IS_ENABLED(CONFIG_CRYPTO_PCBC)
-}, {
-       .cra_name               = "pcbc(aes)",
-       .cra_driver_name        = "pcbc-aes-aesni",
-       .cra_priority           = 400,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_pcbc_init,
-       .cra_exit               = ablk_exit,
-       .cra_u = {
-               .ablkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = ablk_set_key,
-                       .encrypt        = ablk_encrypt,
-                       .decrypt        = ablk_decrypt,
-               },
+       {
+               .algname        = "pcbc(aes)",
+               .drvname        = "pcbc-aes-aesni",
+               .basename       = "fpu(pcbc(__aes-aesni))",
        },
 #endif
-}, {
-       .cra_name               = "__lrw-aes-aesni",
-       .cra_driver_name        = "__driver-lrw-aes-aesni",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct aesni_lrw_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_exit               = lrw_aesni_exit_tfm,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = lrw_aesni_setkey,
-                       .encrypt        = lrw_encrypt,
-                       .decrypt        = lrw_decrypt,
-               },
-       },
-}, {
-       .cra_name               = "__xts-aes-aesni",
-       .cra_driver_name        = "__driver-xts-aes-aesni",
-       .cra_priority           = 0,
-       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
-                                 CRYPTO_ALG_INTERNAL,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct aesni_xts_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_blkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_u = {
-               .blkcipher = {
-                       .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-                       .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = xts_aesni_setkey,
-                       .encrypt        = xts_encrypt,
-                       .decrypt        = xts_decrypt,
-               },
-       },
-}, {
-       .cra_name               = "lrw(aes)",
-       .cra_driver_name        = "lrw-aes-aesni",
-       .cra_priority           = 400,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_u = {
-               .ablkcipher = {
-                       .min_keysize    = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-                       .max_keysize    = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = ablk_set_key,
-                       .encrypt        = ablk_encrypt,
-                       .decrypt        = ablk_decrypt,
-               },
-       },
-}, {
-       .cra_name               = "xts(aes)",
-       .cra_driver_name        = "xts-aes-aesni",
-       .cra_priority           = 400,
-       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-       .cra_blocksize          = AES_BLOCK_SIZE,
-       .cra_ctxsize            = sizeof(struct async_helper_ctx),
-       .cra_alignmask          = 0,
-       .cra_type               = &crypto_ablkcipher_type,
-       .cra_module             = THIS_MODULE,
-       .cra_init               = ablk_init,
-       .cra_exit               = ablk_exit,
-       .cra_u = {
-               .ablkcipher = {
-                       .min_keysize    = 2 * AES_MIN_KEY_SIZE,
-                       .max_keysize    = 2 * AES_MAX_KEY_SIZE,
-                       .ivsize         = AES_BLOCK_SIZE,
-                       .setkey         = ablk_set_key,
-                       .encrypt        = ablk_encrypt,
-                       .decrypt        = ablk_decrypt,
-               },
-       },
-} };
+};
 
 #ifdef CONFIG_X86_64
 static struct aead_alg aesni_aead_algs[] = { {
@@ -1401,9 +1076,27 @@ static const struct x86_cpu_id aesni_cpu_id[] = {
 };
 MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
 
+static void aesni_free_simds(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
+                   aesni_simd_skciphers[i]; i++)
+               simd_skcipher_free(aesni_simd_skciphers[i]);
+
+       for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) &&
+                   aesni_simd_skciphers2[i].simd; i++)
+               simd_skcipher_free(aesni_simd_skciphers2[i].simd);
+}
+
 static int __init aesni_init(void)
 {
+       struct simd_skcipher_alg *simd;
+       const char *basename;
+       const char *algname;
+       const char *drvname;
        int err;
+       int i;
 
        if (!x86_match_cpu(aesni_cpu_id))
                return -ENODEV;
@@ -1445,13 +1138,48 @@ static int __init aesni_init(void)
        if (err)
                goto fpu_exit;
 
+       err = crypto_register_skciphers(aesni_skciphers,
+                                       ARRAY_SIZE(aesni_skciphers));
+       if (err)
+               goto unregister_algs;
+
        err = crypto_register_aeads(aesni_aead_algs,
                                    ARRAY_SIZE(aesni_aead_algs));
        if (err)
-               goto unregister_algs;
+               goto unregister_skciphers;
+
+       for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) {
+               algname = aesni_skciphers[i].base.cra_name + 2;
+               drvname = aesni_skciphers[i].base.cra_driver_name + 2;
+               basename = aesni_skciphers[i].base.cra_driver_name;
+               simd = simd_skcipher_create_compat(algname, drvname, basename);
+               err = PTR_ERR(simd);
+               if (IS_ERR(simd))
+                       goto unregister_simds;
+
+               aesni_simd_skciphers[i] = simd;
+       }
 
-       return err;
+       for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
+               algname = aesni_simd_skciphers2[i].algname;
+               drvname = aesni_simd_skciphers2[i].drvname;
+               basename = aesni_simd_skciphers2[i].basename;
+               simd = simd_skcipher_create_compat(algname, drvname, basename);
+               err = PTR_ERR(simd);
+               if (IS_ERR(simd))
+                       goto unregister_simds;
 
+               aesni_simd_skciphers2[i].simd = simd;
+       }
+
+       return 0;
+
+unregister_simds:
+       aesni_free_simds();
+       crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
+unregister_skciphers:
+       crypto_unregister_skciphers(aesni_skciphers,
+                                   ARRAY_SIZE(aesni_skciphers));
 unregister_algs:
        crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 fpu_exit:
@@ -1461,7 +1189,10 @@ fpu_exit:
 
 static void __exit aesni_exit(void)
 {
+       aesni_free_simds();
        crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
+       crypto_unregister_skciphers(aesni_skciphers,
+                                   ARRAY_SIZE(aesni_skciphers));
        crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 
        crypto_fpu_exit();
index e7d679e..4066804 100644 (file)
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crypto.h>
 #include <asm/fpu/api.h>
 
 struct crypto_fpu_ctx {
-       struct crypto_blkcipher *child;
+       struct crypto_skcipher *child;
 };
 
-static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
+static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
                             unsigned int keylen)
 {
-       struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
-       struct crypto_blkcipher *child = ctx->child;
+       struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
+       struct crypto_skcipher *child = ctx->child;
        int err;
 
-       crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-       crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
-                                  CRYPTO_TFM_REQ_MASK);
-       err = crypto_blkcipher_setkey(child, key, keylen);
-       crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
-                                    CRYPTO_TFM_RES_MASK);
+       crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+                                        CRYPTO_TFM_REQ_MASK);
+       err = crypto_skcipher_setkey(child, key, keylen);
+       crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+                                         CRYPTO_TFM_RES_MASK);
        return err;
 }
 
-static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
-                             struct scatterlist *dst, struct scatterlist *src,
-                             unsigned int nbytes)
+static int crypto_fpu_encrypt(struct skcipher_request *req)
 {
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *child = ctx->child;
+       SKCIPHER_REQUEST_ON_STACK(subreq, child);
        int err;
-       struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-       struct crypto_blkcipher *child = ctx->child;
-       struct blkcipher_desc desc = {
-               .tfm = child,
-               .info = desc_in->info,
-               .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
-       };
+
+       skcipher_request_set_tfm(subreq, child);
+       skcipher_request_set_callback(subreq, 0, NULL, NULL);
+       skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+                                  req->iv);
 
        kernel_fpu_begin();
-       err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
+       err = crypto_skcipher_encrypt(subreq);
        kernel_fpu_end();
+
+       skcipher_request_zero(subreq);
        return err;
 }
 
-static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
-                             struct scatterlist *dst, struct scatterlist *src,
-                             unsigned int nbytes)
+static int crypto_fpu_decrypt(struct skcipher_request *req)
 {
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *child = ctx->child;
+       SKCIPHER_REQUEST_ON_STACK(subreq, child);
        int err;
-       struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-       struct crypto_blkcipher *child = ctx->child;
-       struct blkcipher_desc desc = {
-               .tfm = child,
-               .info = desc_in->info,
-               .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
-       };
+
+       skcipher_request_set_tfm(subreq, child);
+       skcipher_request_set_callback(subreq, 0, NULL, NULL);
+       skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+                                  req->iv);
 
        kernel_fpu_begin();
-       err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
+       err = crypto_skcipher_decrypt(subreq);
        kernel_fpu_end();
+
+       skcipher_request_zero(subreq);
        return err;
 }
 
-static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
+static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
 {
-       struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-       struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-       struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-       struct crypto_blkcipher *cipher;
+       struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+       struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher_spawn *spawn;
+       struct crypto_skcipher *cipher;
 
-       cipher = crypto_spawn_blkcipher(spawn);
+       spawn = skcipher_instance_ctx(inst);
+       cipher = crypto_spawn_skcipher(spawn);
        if (IS_ERR(cipher))
                return PTR_ERR(cipher);
 
        ctx->child = cipher;
+
        return 0;
 }
 
-static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
+{
+       struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+       crypto_free_skcipher(ctx->child);
+}
+
+static void crypto_fpu_free(struct skcipher_instance *inst)
 {
-       struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-       crypto_free_blkcipher(ctx->child);
+       crypto_drop_skcipher(skcipher_instance_ctx(inst));
+       kfree(inst);
 }
 
-static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
+static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_instance *inst;
-       struct crypto_alg *alg;
+       struct crypto_skcipher_spawn *spawn;
+       struct skcipher_instance *inst;
+       struct crypto_attr_type *algt;
+       struct skcipher_alg *alg;
+       const char *cipher_name;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+       algt = crypto_get_attr_type(tb);
+       if (IS_ERR(algt))
+               return PTR_ERR(algt);
+
+       if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
+           algt->mask)
+               return -EINVAL;
+
+       if (!(algt->mask & CRYPTO_ALG_INTERNAL))
+               return -EINVAL;
+
+       cipher_name = crypto_attr_alg_name(tb[1]);
+       if (IS_ERR(cipher_name))
+               return PTR_ERR(cipher_name);
+
+       inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+       if (!inst)
+               return -ENOMEM;
+
+       spawn = skcipher_instance_ctx(inst);
+
+       crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
+       err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
+                                  CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
        if (err)
-               return ERR_PTR(err);
-
-       alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
-                                 CRYPTO_ALG_TYPE_MASK);
-       if (IS_ERR(alg))
-               return ERR_CAST(alg);
-
-       inst = crypto_alloc_instance("fpu", alg);
-       if (IS_ERR(inst))
-               goto out_put_alg;
-
-       inst->alg.cra_flags = alg->cra_flags;
-       inst->alg.cra_priority = alg->cra_priority;
-       inst->alg.cra_blocksize = alg->cra_blocksize;
-       inst->alg.cra_alignmask = alg->cra_alignmask;
-       inst->alg.cra_type = alg->cra_type;
-       inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
-       inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
-       inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
-       inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
-       inst->alg.cra_init = crypto_fpu_init_tfm;
-       inst->alg.cra_exit = crypto_fpu_exit_tfm;
-       inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
-       inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
-       inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
-
-out_put_alg:
-       crypto_mod_put(alg);
-       return inst;
-}
+               goto out_free_inst;
 
-static void crypto_fpu_free(struct crypto_instance *inst)
-{
-       crypto_drop_spawn(crypto_instance_ctx(inst));
+       alg = crypto_skcipher_spawn_alg(spawn);
+
+       err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
+                                 &alg->base);
+       if (err)
+               goto out_drop_skcipher;
+
+       inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
+       inst->alg.base.cra_priority = alg->base.cra_priority;
+       inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+       inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
+
+       inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
+       inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
+       inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
+
+       inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
+
+       inst->alg.init = crypto_fpu_init_tfm;
+       inst->alg.exit = crypto_fpu_exit_tfm;
+
+       inst->alg.setkey = crypto_fpu_setkey;
+       inst->alg.encrypt = crypto_fpu_encrypt;
+       inst->alg.decrypt = crypto_fpu_decrypt;
+
+       inst->free = crypto_fpu_free;
+
+       err = skcipher_register_instance(tmpl, inst);
+       if (err)
+               goto out_drop_skcipher;
+
+out:
+       return err;
+
+out_drop_skcipher:
+       crypto_drop_skcipher(spawn);
+out_free_inst:
        kfree(inst);
+       goto out;
 }
 
 static struct crypto_template crypto_fpu_tmpl = {
        .name = "fpu",
-       .alloc = crypto_fpu_alloc,
-       .free = crypto_fpu_free,
+       .create = crypto_fpu_create,
        .module = THIS_MODULE,
 };
 
index 6a85598..260a060 100644 (file)
 
 #include <linux/module.h>
 #include <crypto/b128ops.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
 
 static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
                                   struct blkcipher_desc *desc,
@@ -339,6 +339,41 @@ done:
        return nbytes;
 }
 
+static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+                                         void *ctx,
+                                         struct skcipher_walk *walk)
+{
+       const unsigned int bsize = 128 / 8;
+       unsigned int nbytes = walk->nbytes;
+       u128 *src = walk->src.virt.addr;
+       u128 *dst = walk->dst.virt.addr;
+       unsigned int num_blocks, func_bytes;
+       unsigned int i;
+
+       /* Process multi-block batch */
+       for (i = 0; i < gctx->num_funcs; i++) {
+               num_blocks = gctx->funcs[i].num_blocks;
+               func_bytes = bsize * num_blocks;
+
+               if (nbytes >= func_bytes) {
+                       do {
+                               gctx->funcs[i].fn_u.xts(ctx, dst, src,
+                                                       walk->iv);
+
+                               src += num_blocks;
+                               dst += num_blocks;
+                               nbytes -= func_bytes;
+                       } while (nbytes >= func_bytes);
+
+                       if (nbytes < bsize)
+                               goto done;
+               }
+       }
+
+done:
+       return nbytes;
+}
+
 /* for implementations implementing faster XTS IV generator */
 int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
                          struct blkcipher_desc *desc, struct scatterlist *dst,
@@ -379,6 +414,43 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 }
 EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
 
+int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+                       struct skcipher_request *req,
+                       common_glue_func_t tweak_fn, void *tweak_ctx,
+                       void *crypt_ctx)
+{
+       const unsigned int bsize = 128 / 8;
+       struct skcipher_walk walk;
+       bool fpu_enabled = false;
+       unsigned int nbytes;
+       int err;
+
+       err = skcipher_walk_virt(&walk, req, false);
+       nbytes = walk.nbytes;
+       if (!nbytes)
+               return err;
+
+       /* set minimum length to bsize, for tweak_fn */
+       fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
+                                           &walk, fpu_enabled,
+                                           nbytes < bsize ? bsize : nbytes);
+
+       /* calculate first value of T */
+       tweak_fn(tweak_ctx, walk.iv, walk.iv);
+
+       while (nbytes) {
+               nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
+
+               err = skcipher_walk_done(&walk, nbytes);
+               nbytes = walk.nbytes;
+       }
+
+       glue_fpu_end(fpu_enabled);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
+
 void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
                               common_glue_func_t fn)
 {
index 9e5b671..acf9fdf 100644 (file)
@@ -114,7 +114,7 @@ static inline void sha1_init_digest(uint32_t *digest)
 }
 
 static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
-                        uint32_t total_len)
+                        uint64_t total_len)
 {
        uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1);
 
index 98a35bc..13590cc 100644 (file)
@@ -125,7 +125,7 @@ struct sha1_hash_ctx {
        /* error flag */
        int error;
 
-       uint32_t        total_length;
+       uint64_t        total_length;
        const void      *incoming_buffer;
        uint32_t        incoming_buffer_length;
        uint8_t         partial_block_buffer[SHA1_BLOCK_SIZE * 2];
index 6f97fb3..7926a22 100644 (file)
@@ -115,7 +115,7 @@ inline void sha256_init_digest(uint32_t *digest)
 }
 
 inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
-                        uint32_t total_len)
+                        uint64_t total_len)
 {
        uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1);
 
index edd252b..aabb303 100644 (file)
@@ -125,7 +125,7 @@ struct sha256_hash_ctx {
        /* error flag */
        int error;
 
-       uint32_t        total_length;
+       uint64_t        total_length;
        const void      *incoming_buffer;
        uint32_t        incoming_buffer_length;
        uint8_t         partial_block_buffer[SHA256_BLOCK_SIZE * 2];
index d210174..9c1bb6d 100644 (file)
@@ -117,7 +117,7 @@ inline void sha512_init_digest(uint64_t *digest)
 }
 
 inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
-                        uint32_t total_len)
+                        uint64_t total_len)
 {
        uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
 
index 9d4b2c8..e4653f5 100644 (file)
@@ -119,7 +119,7 @@ struct sha512_hash_ctx {
        /* error flag */
        int error;
 
-       uint32_t        total_length;
+       uint64_t        total_length;
        const void      *incoming_buffer;
        uint32_t        incoming_buffer_length;
        uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
index 03bb106..29e53ea 100644 (file)
@@ -5,8 +5,8 @@
 #ifndef _CRYPTO_GLUE_HELPER_H
 #define _CRYPTO_GLUE_HELPER_H
 
+#include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
-#include <linux/crypto.h>
 #include <asm/fpu/api.h>
 #include <crypto/b128ops.h>
 
@@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
        return true;
 }
 
+static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
+                                        int fpu_blocks_limit,
+                                        struct skcipher_walk *walk,
+                                        bool fpu_enabled, unsigned int nbytes)
+{
+       if (likely(fpu_blocks_limit < 0))
+               return false;
+
+       if (fpu_enabled)
+               return true;
+
+       /*
+        * Vector-registers are only used when chunk to be processed is large
+        * enough, so do not enable FPU until it is necessary.
+        */
+       if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
+               return false;
+
+       /* prevent sleeping if FPU is in use */
+       skcipher_walk_atomise(walk);
+
+       kernel_fpu_begin();
+       return true;
+}
+
 static inline void glue_fpu_end(bool fpu_enabled)
 {
        if (fpu_enabled)
@@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
                                 common_glue_func_t tweak_fn, void *tweak_ctx,
                                 void *crypt_ctx);
 
+extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
+                                struct blkcipher_desc *desc,
+                                struct scatterlist *dst,
+                                struct scatterlist *src, unsigned int nbytes,
+                                common_glue_func_t tweak_fn, void *tweak_ctx,
+                                void *crypt_ctx);
+
+extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+                              struct skcipher_request *req,
+                              common_glue_func_t tweak_fn, void *tweak_ctx,
+                              void *crypt_ctx);
+
 extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
                                      le128 *iv, common_glue_func_t fn);
 
index 98e387e..bc26dc9 100644 (file)
 #include <linux/module.h>
 #include <linux/crypto.h>
 #include <linux/sw842.h>
+#include <crypto/internal/scompress.h>
 
 struct crypto842_ctx {
-       char wmem[SW842_MEM_COMPRESS];  /* working memory for compress */
+       void *wmem;     /* working memory for compress */
 };
 
+static void *crypto842_alloc_ctx(struct crypto_scomp *tfm)
+{
+       void *ctx;
+
+       ctx = kmalloc(SW842_MEM_COMPRESS, GFP_KERNEL);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       return ctx;
+}
+
+static int crypto842_init(struct crypto_tfm *tfm)
+{
+       struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       ctx->wmem = crypto842_alloc_ctx(NULL);
+       if (IS_ERR(ctx->wmem))
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void crypto842_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+       kfree(ctx);
+}
+
+static void crypto842_exit(struct crypto_tfm *tfm)
+{
+       struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       crypto842_free_ctx(NULL, ctx->wmem);
+}
+
 static int crypto842_compress(struct crypto_tfm *tfm,
                              const u8 *src, unsigned int slen,
                              u8 *dst, unsigned int *dlen)
@@ -45,6 +80,13 @@ static int crypto842_compress(struct crypto_tfm *tfm,
        return sw842_compress(src, slen, dst, dlen, ctx->wmem);
 }
 
+static int crypto842_scompress(struct crypto_scomp *tfm,
+                              const u8 *src, unsigned int slen,
+                              u8 *dst, unsigned int *dlen, void *ctx)
+{
+       return sw842_compress(src, slen, dst, dlen, ctx);
+}
+
 static int crypto842_decompress(struct crypto_tfm *tfm,
                                const u8 *src, unsigned int slen,
                                u8 *dst, unsigned int *dlen)
@@ -52,6 +94,13 @@ static int crypto842_decompress(struct crypto_tfm *tfm,
        return sw842_decompress(src, slen, dst, dlen);
 }
 
+static int crypto842_sdecompress(struct crypto_scomp *tfm,
+                                const u8 *src, unsigned int slen,
+                                u8 *dst, unsigned int *dlen, void *ctx)
+{
+       return sw842_decompress(src, slen, dst, dlen);
+}
+
 static struct crypto_alg alg = {
        .cra_name               = "842",
        .cra_driver_name        = "842-generic",
@@ -59,20 +108,48 @@ static struct crypto_alg alg = {
        .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
        .cra_ctxsize            = sizeof(struct crypto842_ctx),
        .cra_module             = THIS_MODULE,
+       .cra_init               = crypto842_init,
+       .cra_exit               = crypto842_exit,
        .cra_u                  = { .compress = {
        .coa_compress           = crypto842_compress,
        .coa_decompress         = crypto842_decompress } }
 };
 
+static struct scomp_alg scomp = {
+       .alloc_ctx              = crypto842_alloc_ctx,
+       .free_ctx               = crypto842_free_ctx,
+       .compress               = crypto842_scompress,
+       .decompress             = crypto842_sdecompress,
+       .base                   = {
+               .cra_name       = "842",
+               .cra_driver_name = "842-scomp",
+               .cra_priority    = 100,
+               .cra_module      = THIS_MODULE,
+       }
+};
+
 static int __init crypto842_mod_init(void)
 {
-       return crypto_register_alg(&alg);
+       int ret;
+
+       ret = crypto_register_alg(&alg);
+       if (ret)
+               return ret;
+
+       ret = crypto_register_scomp(&scomp);
+       if (ret) {
+               crypto_unregister_alg(&alg);
+               return ret;
+       }
+
+       return ret;
 }
 module_init(crypto842_mod_init);
 
 static void __exit crypto842_mod_exit(void)
 {
        crypto_unregister_alg(&alg);
+       crypto_unregister_scomp(&scomp);
 }
 module_exit(crypto842_mod_exit);
 
index 84d7148..160f08e 100644 (file)
@@ -24,7 +24,7 @@ comment "Crypto core or helper"
 config CRYPTO_FIPS
        bool "FIPS 200 compliance"
        depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && !CRYPTO_MANAGER_DISABLE_TESTS
-       depends on MODULE_SIG
+       depends on (MODULE_SIG || !MODULES)
        help
          This options enables the fips boot option which is
          required if you want to system to operate in a FIPS 200
@@ -102,6 +102,15 @@ config CRYPTO_KPP
        select CRYPTO_ALGAPI
        select CRYPTO_KPP2
 
+config CRYPTO_ACOMP2
+       tristate
+       select CRYPTO_ALGAPI2
+
+config CRYPTO_ACOMP
+       tristate
+       select CRYPTO_ALGAPI
+       select CRYPTO_ACOMP2
+
 config CRYPTO_RSA
        tristate "RSA algorithm"
        select CRYPTO_AKCIPHER
@@ -138,6 +147,7 @@ config CRYPTO_MANAGER2
        select CRYPTO_BLKCIPHER2
        select CRYPTO_AKCIPHER2
        select CRYPTO_KPP2
+       select CRYPTO_ACOMP2
 
 config CRYPTO_USER
        tristate "Userspace cryptographic algorithm configuration"
@@ -236,10 +246,14 @@ config CRYPTO_ABLK_HELPER
        tristate
        select CRYPTO_CRYPTD
 
+config CRYPTO_SIMD
+       tristate
+       select CRYPTO_CRYPTD
+
 config CRYPTO_GLUE_HELPER_X86
        tristate
        depends on X86
-       select CRYPTO_ALGAPI
+       select CRYPTO_BLKCIPHER
 
 config CRYPTO_ENGINE
        tristate
@@ -437,7 +451,7 @@ config CRYPTO_CRC32C_INTEL
          gain performance compared with software implementation.
          Module will be crc32c-intel.
 
-config CRYPT_CRC32C_VPMSUM
+config CRYPTO_CRC32C_VPMSUM
        tristate "CRC32c CRC algorithm (powerpc64)"
        depends on PPC64 && ALTIVEC
        select CRYPTO_HASH
@@ -928,14 +942,13 @@ config CRYPTO_AES_X86_64
 config CRYPTO_AES_NI_INTEL
        tristate "AES cipher algorithms (AES-NI)"
        depends on X86
+       select CRYPTO_AEAD
        select CRYPTO_AES_X86_64 if 64BIT
        select CRYPTO_AES_586 if !64BIT
-       select CRYPTO_CRYPTD
-       select CRYPTO_ABLK_HELPER
        select CRYPTO_ALGAPI
+       select CRYPTO_BLKCIPHER
        select CRYPTO_GLUE_HELPER_X86 if 64BIT
-       select CRYPTO_LRW
-       select CRYPTO_XTS
+       select CRYPTO_SIMD
        help
          Use Intel AES-NI instructions for AES algorithm.
 
@@ -1568,6 +1581,7 @@ comment "Compression"
 config CRYPTO_DEFLATE
        tristate "Deflate compression algorithm"
        select CRYPTO_ALGAPI
+       select CRYPTO_ACOMP2
        select ZLIB_INFLATE
        select ZLIB_DEFLATE
        help
@@ -1579,6 +1593,7 @@ config CRYPTO_DEFLATE
 config CRYPTO_LZO
        tristate "LZO compression algorithm"
        select CRYPTO_ALGAPI
+       select CRYPTO_ACOMP2
        select LZO_COMPRESS
        select LZO_DECOMPRESS
        help
@@ -1587,6 +1602,7 @@ config CRYPTO_LZO
 config CRYPTO_842
        tristate "842 compression algorithm"
        select CRYPTO_ALGAPI
+       select CRYPTO_ACOMP2
        select 842_COMPRESS
        select 842_DECOMPRESS
        help
@@ -1595,6 +1611,7 @@ config CRYPTO_842
 config CRYPTO_LZ4
        tristate "LZ4 compression algorithm"
        select CRYPTO_ALGAPI
+       select CRYPTO_ACOMP2
        select LZ4_COMPRESS
        select LZ4_DECOMPRESS
        help
@@ -1603,6 +1620,7 @@ config CRYPTO_LZ4
 config CRYPTO_LZ4HC
        tristate "LZ4HC compression algorithm"
        select CRYPTO_ALGAPI
+       select CRYPTO_ACOMP2
        select LZ4HC_COMPRESS
        select LZ4_DECOMPRESS
        help
index bd6a029..b8f0e3e 100644 (file)
@@ -51,6 +51,10 @@ rsa_generic-y += rsa_helper.o
 rsa_generic-y += rsa-pkcs1pad.o
 obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o
 
+crypto_acompress-y := acompress.o
+crypto_acompress-y += scompress.o
+obj-$(CONFIG_CRYPTO_ACOMP2) += crypto_acompress.o
+
 cryptomgr-y := algboss.o testmgr.o
 
 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
@@ -139,3 +143,5 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
 obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
 obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
+crypto_simd-y := simd.o
+obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o
diff --git a/crypto/acompress.c b/crypto/acompress.c
new file mode 100644 (file)
index 0000000..887783d
--- /dev/null
@@ -0,0 +1,169 @@
+/*
+ * Asynchronous Compression operations
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Weigang Li <weigang.li@intel.com>
+ *          Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <linux/cryptouser.h>
+#include <net/netlink.h>
+#include <crypto/internal/acompress.h>
+#include <crypto/internal/scompress.h>
+#include "internal.h"
+
+static const struct crypto_type crypto_acomp_type;
+
+#ifdef CONFIG_NET
+static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+       struct crypto_report_acomp racomp;
+
+       strncpy(racomp.type, "acomp", sizeof(racomp.type));
+
+       if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
+                   sizeof(struct crypto_report_acomp), &racomp))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+#else
+static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+       return -ENOSYS;
+}
+#endif
+
+static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
+       __attribute__ ((unused));
+
+static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg)
+{
+       seq_puts(m, "type         : acomp\n");
+}
+
+static void crypto_acomp_exit_tfm(struct crypto_tfm *tfm)
+{
+       struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm);
+       struct acomp_alg *alg = crypto_acomp_alg(acomp);
+
+       alg->exit(acomp);
+}
+
+static int crypto_acomp_init_tfm(struct crypto_tfm *tfm)
+{
+       struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm);
+       struct acomp_alg *alg = crypto_acomp_alg(acomp);
+
+       if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
+               return crypto_init_scomp_ops_async(tfm);
+
+       acomp->compress = alg->compress;
+       acomp->decompress = alg->decompress;
+       acomp->dst_free = alg->dst_free;
+       acomp->reqsize = alg->reqsize;
+
+       if (alg->exit)
+               acomp->base.exit = crypto_acomp_exit_tfm;
+
+       if (alg->init)
+               return alg->init(acomp);
+
+       return 0;
+}
+
+static unsigned int crypto_acomp_extsize(struct crypto_alg *alg)
+{
+       int extsize = crypto_alg_extsize(alg);
+
+       if (alg->cra_type != &crypto_acomp_type)
+               extsize += sizeof(struct crypto_scomp *);
+
+       return extsize;
+}
+
+static const struct crypto_type crypto_acomp_type = {
+       .extsize = crypto_acomp_extsize,
+       .init_tfm = crypto_acomp_init_tfm,
+#ifdef CONFIG_PROC_FS
+       .show = crypto_acomp_show,
+#endif
+       .report = crypto_acomp_report,
+       .maskclear = ~CRYPTO_ALG_TYPE_MASK,
+       .maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK,
+       .type = CRYPTO_ALG_TYPE_ACOMPRESS,
+       .tfmsize = offsetof(struct crypto_acomp, base),
+};
+
+struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
+                                       u32 mask)
+{
+       return crypto_alloc_tfm(alg_name, &crypto_acomp_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_acomp);
+
+struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
+{
+       struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+       struct acomp_req *req;
+
+       req = __acomp_request_alloc(acomp);
+       if (req && (tfm->__crt_alg->cra_type != &crypto_acomp_type))
+               return crypto_acomp_scomp_alloc_ctx(req);
+
+       return req;
+}
+EXPORT_SYMBOL_GPL(acomp_request_alloc);
+
+void acomp_request_free(struct acomp_req *req)
+{
+       struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
+       struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+
+       if (tfm->__crt_alg->cra_type != &crypto_acomp_type)
+               crypto_acomp_scomp_free_ctx(req);
+
+       if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) {
+               acomp->dst_free(req->dst);
+               req->dst = NULL;
+       }
+
+       __acomp_request_free(req);
+}
+EXPORT_SYMBOL_GPL(acomp_request_free);
+
+int crypto_register_acomp(struct acomp_alg *alg)
+{
+       struct crypto_alg *base = &alg->base;
+
+       base->cra_type = &crypto_acomp_type;
+       base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+       base->cra_flags |= CRYPTO_ALG_TYPE_ACOMPRESS;
+
+       return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_acomp);
+
+int crypto_unregister_acomp(struct acomp_alg *alg)
+{
+       return crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_acomp);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Asynchronous compression type");
index 6e39d9c..ccb85e1 100644 (file)
@@ -247,12 +247,8 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
        memcpy(param->alg, alg->cra_name, sizeof(param->alg));
        type = alg->cra_flags;
 
-       /* This piece of crap needs to disappear into per-type test hooks. */
-       if (!((type ^ CRYPTO_ALG_TYPE_BLKCIPHER) &
-             CRYPTO_ALG_TYPE_BLKCIPHER_MASK) && !(type & CRYPTO_ALG_GENIV) &&
-           ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-            CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
-                                        alg->cra_ablkcipher.ivsize))
+       /* Do not test internal algorithms. */
+       if (type & CRYPTO_ALG_INTERNAL)
                type |= CRYPTO_ALG_TESTED;
 
        param->type = type;
index 235f54d..668ef40 100644 (file)
@@ -454,12 +454,13 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg,
        used -= ctx->aead_assoclen;
 
        /* take over all tx sgls from ctx */
-       areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur,
+       areq->tsgl = sock_kmalloc(sk,
+                                 sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1),
                                  GFP_KERNEL);
        if (unlikely(!areq->tsgl))
                goto free;
 
-       sg_init_table(areq->tsgl, sgl->cur);
+       sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1));
        for (i = 0; i < sgl->cur; i++)
                sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]),
                            sgl->sg[i].length, sgl->sg[i].offset);
index 1e38aaa..a9e79d8 100644 (file)
@@ -566,8 +566,10 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg,
                         * need to expand */
                        tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
                                      GFP_KERNEL);
-                       if (!tmp)
+                       if (!tmp) {
+                               err = -ENOMEM;
                                goto free;
+                       }
 
                        sg_init_table(tmp, tx_nents * 2);
                        for (x = 0; x < tx_nents; x++)
index bbc147c..b16ce16 100644 (file)
@@ -211,8 +211,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
        if (!name)
                return ERR_PTR(-ENOENT);
 
+       type &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
        mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
-       type &= mask;
 
        alg = crypto_alg_lookup(name, type, mask);
        if (!alg) {
@@ -310,24 +310,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 {
        const struct crypto_type *type = tfm->__crt_alg->cra_type;
 
-       if (type) {
-               if (tfm->exit)
-                       tfm->exit(tfm);
-               return;
-       }
-
-       switch (crypto_tfm_alg_type(tfm)) {
-       case CRYPTO_ALG_TYPE_CIPHER:
-               crypto_exit_cipher_ops(tfm);
-               break;
-
-       case CRYPTO_ALG_TYPE_COMPRESS:
-               crypto_exit_compress_ops(tfm);
-               break;
-
-       default:
-               BUG();
-       }
+       if (type && tfm->exit)
+               tfm->exit(tfm);
 }
 
 static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
index a7e1ac7..875470b 100644 (file)
@@ -324,7 +324,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
        if (IS_ERR(auth))
                return PTR_ERR(auth);
 
-       enc = crypto_spawn_skcipher2(&ictx->enc);
+       enc = crypto_spawn_skcipher(&ictx->enc);
        err = PTR_ERR(enc);
        if (IS_ERR(enc))
                goto err_free_ahash;
@@ -420,9 +420,9 @@ static int crypto_authenc_create(struct crypto_template *tmpl,
                goto err_free_inst;
 
        crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-       err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
-                                   crypto_requires_sync(algt->type,
-                                                        algt->mask));
+       err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_drop_auth;
 
index 121010a..6f8f6b8 100644 (file)
@@ -342,7 +342,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
        if (IS_ERR(auth))
                return PTR_ERR(auth);
 
-       enc = crypto_spawn_skcipher2(&ictx->enc);
+       enc = crypto_spawn_skcipher(&ictx->enc);
        err = PTR_ERR(enc);
        if (IS_ERR(enc))
                goto err_free_ahash;
@@ -441,9 +441,9 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl,
                goto err_free_inst;
 
        crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst));
-       err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0,
-                                   crypto_requires_sync(algt->type,
-                                                        algt->mask));
+       err = crypto_grab_skcipher(&ctx->enc, enc_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_drop_auth;
 
index 780ee27..68f751a 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * CBC: Cipher Block Chaining mode
  *
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2006-2016 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/cbc.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/scatterlist.h>
 #include <linux/slab.h>
 
 struct crypto_cbc_ctx {
        struct crypto_cipher *child;
 };
 
-static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key,
+static int crypto_cbc_setkey(struct crypto_skcipher *parent, const u8 *key,
                             unsigned int keylen)
 {
-       struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent);
+       struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(parent);
        struct crypto_cipher *child = ctx->child;
        int err;
 
        crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-       crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+       crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
                                       CRYPTO_TFM_REQ_MASK);
        err = crypto_cipher_setkey(child, key, keylen);
-       crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-                                    CRYPTO_TFM_RES_MASK);
+       crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+                                         CRYPTO_TFM_RES_MASK);
        return err;
 }
 
-static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc,
-                                     struct blkcipher_walk *walk,
-                                     struct crypto_cipher *tfm)
+static inline void crypto_cbc_encrypt_one(struct crypto_skcipher *tfm,
+                                         const u8 *src, u8 *dst)
 {
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_encrypt;
-       int bsize = crypto_cipher_blocksize(tfm);
-       unsigned int nbytes = walk->nbytes;
-       u8 *src = walk->src.virt.addr;
-       u8 *dst = walk->dst.virt.addr;
-       u8 *iv = walk->iv;
-
-       do {
-               crypto_xor(iv, src, bsize);
-               fn(crypto_cipher_tfm(tfm), dst, iv);
-               memcpy(iv, dst, bsize);
-
-               src += bsize;
-               dst += bsize;
-       } while ((nbytes -= bsize) >= bsize);
-
-       return nbytes;
-}
-
-static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc,
-                                     struct blkcipher_walk *walk,
-                                     struct crypto_cipher *tfm)
-{
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_encrypt;
-       int bsize = crypto_cipher_blocksize(tfm);
-       unsigned int nbytes = walk->nbytes;
-       u8 *src = walk->src.virt.addr;
-       u8 *iv = walk->iv;
-
-       do {
-               crypto_xor(src, iv, bsize);
-               fn(crypto_cipher_tfm(tfm), src, src);
-               iv = src;
-
-               src += bsize;
-       } while ((nbytes -= bsize) >= bsize);
+       struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-       memcpy(walk->iv, iv, bsize);
-
-       return nbytes;
+       crypto_cipher_encrypt_one(ctx->child, dst, src);
 }
 
-static int crypto_cbc_encrypt(struct blkcipher_desc *desc,
-                             struct scatterlist *dst, struct scatterlist *src,
-                             unsigned int nbytes)
+static int crypto_cbc_encrypt(struct skcipher_request *req)
 {
-       struct blkcipher_walk walk;
-       struct crypto_blkcipher *tfm = desc->tfm;
-       struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
-       struct crypto_cipher *child = ctx->child;
-       int err;
-
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
-
-       while ((nbytes = walk.nbytes)) {
-               if (walk.src.virt.addr == walk.dst.virt.addr)
-                       nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child);
-               else
-                       nbytes = crypto_cbc_encrypt_segment(desc, &walk, child);
-               err = blkcipher_walk_done(desc, &walk, nbytes);
-       }
-
-       return err;
+       return crypto_cbc_encrypt_walk(req, crypto_cbc_encrypt_one);
 }
 
-static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc,
-                                     struct blkcipher_walk *walk,
-                                     struct crypto_cipher *tfm)
+static inline void crypto_cbc_decrypt_one(struct crypto_skcipher *tfm,
+                                         const u8 *src, u8 *dst)
 {
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_decrypt;
-       int bsize = crypto_cipher_blocksize(tfm);
-       unsigned int nbytes = walk->nbytes;
-       u8 *src = walk->src.virt.addr;
-       u8 *dst = walk->dst.virt.addr;
-       u8 *iv = walk->iv;
-
-       do {
-               fn(crypto_cipher_tfm(tfm), dst, src);
-               crypto_xor(dst, iv, bsize);
-               iv = src;
-
-               src += bsize;
-               dst += bsize;
-       } while ((nbytes -= bsize) >= bsize);
-
-       memcpy(walk->iv, iv, bsize);
-
-       return nbytes;
-}
+       struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc,
-                                     struct blkcipher_walk *walk,
-                                     struct crypto_cipher *tfm)
-{
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_decrypt;
-       int bsize = crypto_cipher_blocksize(tfm);
-       unsigned int nbytes = walk->nbytes;
-       u8 *src = walk->src.virt.addr;
-       u8 last_iv[bsize];
-
-       /* Start of the last block. */
-       src += nbytes - (nbytes & (bsize - 1)) - bsize;
-       memcpy(last_iv, src, bsize);
-
-       for (;;) {
-               fn(crypto_cipher_tfm(tfm), src, src);
-               if ((nbytes -= bsize) < bsize)
-                       break;
-               crypto_xor(src, src - bsize, bsize);
-               src -= bsize;
-       }
-
-       crypto_xor(src, walk->iv, bsize);
-       memcpy(walk->iv, last_iv, bsize);
-
-       return nbytes;
+       crypto_cipher_decrypt_one(ctx->child, dst, src);
 }
 
-static int crypto_cbc_decrypt(struct blkcipher_desc *desc,
-                             struct scatterlist *dst, struct scatterlist *src,
-                             unsigned int nbytes)
+static int crypto_cbc_decrypt(struct skcipher_request *req)
 {
-       struct blkcipher_walk walk;
-       struct crypto_blkcipher *tfm = desc->tfm;
-       struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
-       struct crypto_cipher *child = ctx->child;
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct skcipher_walk walk;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, false);
 
-       while ((nbytes = walk.nbytes)) {
-               if (walk.src.virt.addr == walk.dst.virt.addr)
-                       nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child);
-               else
-                       nbytes = crypto_cbc_decrypt_segment(desc, &walk, child);
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+       while (walk.nbytes) {
+               err = crypto_cbc_decrypt_blocks(&walk, tfm,
+                                               crypto_cbc_decrypt_one);
+               err = skcipher_walk_done(&walk, err);
        }
 
        return err;
 }
 
-static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
+static int crypto_cbc_init_tfm(struct crypto_skcipher *tfm)
 {
-       struct crypto_instance *inst = (void *)tfm->__crt_alg;
-       struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-       struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+       struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+       struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct crypto_cipher *cipher;
 
        cipher = crypto_spawn_cipher(spawn);
@@ -205,72 +92,94 @@ static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
        return 0;
 }
 
-static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_cbc_exit_tfm(struct crypto_skcipher *tfm)
 {
-       struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
        crypto_free_cipher(ctx->child);
 }
 
-static struct crypto_instance *crypto_cbc_alloc(struct rtattr **tb)
+static void crypto_cbc_free(struct skcipher_instance *inst)
+{
+       crypto_drop_skcipher(skcipher_instance_ctx(inst));
+       kfree(inst);
+}
+
+static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_instance *inst;
+       struct skcipher_instance *inst;
+       struct crypto_spawn *spawn;
        struct crypto_alg *alg;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
        if (err)
-               return ERR_PTR(err);
+               return err;
+
+       inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+       if (!inst)
+               return -ENOMEM;
 
        alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
                                  CRYPTO_ALG_TYPE_MASK);
+       err = PTR_ERR(alg);
        if (IS_ERR(alg))
-               return ERR_CAST(alg);
+               goto err_free_inst;
 
-       inst = ERR_PTR(-EINVAL);
-       if (!is_power_of_2(alg->cra_blocksize))
-               goto out_put_alg;
+       spawn = skcipher_instance_ctx(inst);
+       err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+                               CRYPTO_ALG_TYPE_MASK);
+       crypto_mod_put(alg);
+       if (err)
+               goto err_free_inst;
 
-       inst = crypto_alloc_instance("cbc", alg);
-       if (IS_ERR(inst))
-               goto out_put_alg;
+       err = crypto_inst_setname(skcipher_crypto_instance(inst), "cbc", alg);
+       if (err)
+               goto err_drop_spawn;
 
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-       inst->alg.cra_priority = alg->cra_priority;
-       inst->alg.cra_blocksize = alg->cra_blocksize;
-       inst->alg.cra_alignmask = alg->cra_alignmask;
-       inst->alg.cra_type = &crypto_blkcipher_type;
+       err = -EINVAL;
+       if (!is_power_of_2(alg->cra_blocksize))
+               goto err_drop_spawn;
+
+       inst->alg.base.cra_priority = alg->cra_priority;
+       inst->alg.base.cra_blocksize = alg->cra_blocksize;
+       inst->alg.base.cra_alignmask = alg->cra_alignmask;
 
        /* We access the data as u32s when xoring. */
-       inst->alg.cra_alignmask |= __alignof__(u32) - 1;
+       inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
 
-       inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-       inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
-       inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+       inst->alg.ivsize = alg->cra_blocksize;
+       inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+       inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
 
-       inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
+       inst->alg.base.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
 
-       inst->alg.cra_init = crypto_cbc_init_tfm;
-       inst->alg.cra_exit = crypto_cbc_exit_tfm;
+       inst->alg.init = crypto_cbc_init_tfm;
+       inst->alg.exit = crypto_cbc_exit_tfm;
 
-       inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey;
-       inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt;
-       inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt;
+       inst->alg.setkey = crypto_cbc_setkey;
+       inst->alg.encrypt = crypto_cbc_encrypt;
+       inst->alg.decrypt = crypto_cbc_decrypt;
 
-out_put_alg:
-       crypto_mod_put(alg);
-       return inst;
-}
+       inst->free = crypto_cbc_free;
 
-static void crypto_cbc_free(struct crypto_instance *inst)
-{
-       crypto_drop_spawn(crypto_instance_ctx(inst));
+       err = skcipher_register_instance(tmpl, inst);
+       if (err)
+               goto err_drop_spawn;
+
+out:
+       return err;
+
+err_drop_spawn:
+       crypto_drop_spawn(spawn);
+err_free_inst:
        kfree(inst);
+       goto out;
 }
 
 static struct crypto_template crypto_cbc_tmpl = {
        .name = "cbc",
-       .alloc = crypto_cbc_alloc,
-       .free = crypto_cbc_free,
+       .create = crypto_cbc_create,
        .module = THIS_MODULE,
 };
 
index 006d857..26b924d 100644 (file)
@@ -462,7 +462,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
        if (IS_ERR(cipher))
                return PTR_ERR(cipher);
 
-       ctr = crypto_spawn_skcipher2(&ictx->ctr);
+       ctr = crypto_spawn_skcipher(&ictx->ctr);
        err = PTR_ERR(ctr);
        if (IS_ERR(ctr))
                goto err_free_cipher;
@@ -544,9 +544,9 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl,
                goto err_free_inst;
 
        crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst));
-       err = crypto_grab_skcipher2(&ictx->ctr, ctr_name, 0,
-                                   crypto_requires_sync(algt->type,
-                                                        algt->mask));
+       err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_drop_cipher;
 
index e899ef5..db1bc31 100644 (file)
@@ -532,7 +532,7 @@ static int chachapoly_init(struct crypto_aead *tfm)
        if (IS_ERR(poly))
                return PTR_ERR(poly);
 
-       chacha = crypto_spawn_skcipher2(&ictx->chacha);
+       chacha = crypto_spawn_skcipher(&ictx->chacha);
        if (IS_ERR(chacha)) {
                crypto_free_ahash(poly);
                return PTR_ERR(chacha);
@@ -625,9 +625,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
                goto err_free_inst;
 
        crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst));
-       err = crypto_grab_skcipher2(&ctx->chacha, chacha_name, 0,
-                                   crypto_requires_sync(algt->type,
-                                                        algt->mask));
+       err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_drop_poly;
 
index 39541e0..94fa355 100644 (file)
@@ -116,7 +116,3 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 
        return 0;
 }
-
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm)
-{
-}
index 7a8bfbd..04080dc 100644 (file)
@@ -57,7 +57,8 @@ static int crypto_cmac_digest_setkey(struct crypto_shash *parent,
        unsigned long alignmask = crypto_shash_alignmask(parent);
        struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
        unsigned int bs = crypto_shash_blocksize(parent);
-       __be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
+       __be64 *consts = PTR_ALIGN((void *)ctx->ctx,
+                                  (alignmask | (__alignof__(__be64) - 1)) + 1);
        u64 _const[2];
        int i, err = 0;
        u8 msb_mask, gfmask;
@@ -173,7 +174,8 @@ static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out)
        struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
        struct crypto_cipher *tfm = tctx->child;
        int bs = crypto_shash_blocksize(parent);
-       u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1);
+       u8 *consts = PTR_ALIGN((void *)tctx->ctx,
+                              (alignmask | (__alignof__(__be64) - 1)) + 1);
        u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
        u8 *prev = odds + bs;
        unsigned int offset = 0;
@@ -243,6 +245,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        case 8:
                break;
        default:
+               err = -EINVAL;
                goto out_put_alg;
        }
 
@@ -257,7 +260,8 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
        if (err)
                goto out_free_inst;
 
-       alignmask = alg->cra_alignmask | (sizeof(long) - 1);
+       /* We access the data as u32s when xoring. */
+       alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
        inst->alg.base.cra_alignmask = alignmask;
        inst->alg.base.cra_priority = alg->cra_priority;
        inst->alg.base.cra_blocksize = alg->cra_blocksize;
@@ -269,7 +273,9 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
                + alg->cra_blocksize * 2;
 
        inst->alg.base.cra_ctxsize =
-               ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1)
+               ALIGN(sizeof(struct cmac_tfm_ctx), crypto_tfm_ctx_alignment())
+               + ((alignmask | (__alignof__(__be64) - 1)) &
+                  ~(crypto_tfm_ctx_alignment() - 1))
                + alg->cra_blocksize * 2;
 
        inst->alg.base.cra_init = cmac_init_tfm;
index c33f076..f2d5229 100644 (file)
@@ -42,7 +42,3 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm)
 
        return 0;
 }
-
-void crypto_exit_compress_ops(struct crypto_tfm *tfm)
-{
-}
index 0c654e5..0508c48 100644 (file)
@@ -17,9 +17,9 @@
  *
  */
 
-#include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/cryptd.h>
 #include <crypto/crypto_wq.h>
 #include <linux/atomic.h>
@@ -48,6 +48,11 @@ struct cryptd_instance_ctx {
        struct cryptd_queue *queue;
 };
 
+struct skcipherd_instance_ctx {
+       struct crypto_skcipher_spawn spawn;
+       struct cryptd_queue *queue;
+};
+
 struct hashd_instance_ctx {
        struct crypto_shash_spawn spawn;
        struct cryptd_queue *queue;
@@ -67,6 +72,15 @@ struct cryptd_blkcipher_request_ctx {
        crypto_completion_t complete;
 };
 
+struct cryptd_skcipher_ctx {
+       atomic_t refcnt;
+       struct crypto_skcipher *child;
+};
+
+struct cryptd_skcipher_request_ctx {
+       crypto_completion_t complete;
+};
+
 struct cryptd_hash_ctx {
        atomic_t refcnt;
        struct crypto_shash *child;
@@ -122,7 +136,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
 {
        int cpu, err;
        struct cryptd_cpu_queue *cpu_queue;
-       struct crypto_tfm *tfm;
        atomic_t *refcnt;
        bool may_backlog;
 
@@ -141,7 +154,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue,
        if (!atomic_read(refcnt))
                goto out_put_cpu;
 
-       tfm = request->tfm;
        atomic_inc(refcnt);
 
 out_put_cpu:
@@ -432,6 +444,216 @@ out_put_alg:
        return err;
 }
 
+static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
+                                 const u8 *key, unsigned int keylen)
+{
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
+       struct crypto_skcipher *child = ctx->child;
+       int err;
+
+       crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+                                        CRYPTO_TFM_REQ_MASK);
+       err = crypto_skcipher_setkey(child, key, keylen);
+       crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+                                         CRYPTO_TFM_RES_MASK);
+       return err;
+}
+
+static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+       int refcnt = atomic_read(&ctx->refcnt);
+
+       local_bh_disable();
+       rctx->complete(&req->base, err);
+       local_bh_enable();
+
+       if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+               crypto_free_skcipher(tfm);
+}
+
+static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
+                                   int err)
+{
+       struct skcipher_request *req = skcipher_request_cast(base);
+       struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *child = ctx->child;
+       SKCIPHER_REQUEST_ON_STACK(subreq, child);
+
+       if (unlikely(err == -EINPROGRESS))
+               goto out;
+
+       skcipher_request_set_tfm(subreq, child);
+       skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     NULL, NULL);
+       skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+                                  req->iv);
+
+       err = crypto_skcipher_encrypt(subreq);
+       skcipher_request_zero(subreq);
+
+       req->base.complete = rctx->complete;
+
+out:
+       cryptd_skcipher_complete(req, err);
+}
+
+static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
+                                   int err)
+{
+       struct skcipher_request *req = skcipher_request_cast(base);
+       struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *child = ctx->child;
+       SKCIPHER_REQUEST_ON_STACK(subreq, child);
+
+       if (unlikely(err == -EINPROGRESS))
+               goto out;
+
+       skcipher_request_set_tfm(subreq, child);
+       skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
+                                     NULL, NULL);
+       skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+                                  req->iv);
+
+       err = crypto_skcipher_decrypt(subreq);
+       skcipher_request_zero(subreq);
+
+       req->base.complete = rctx->complete;
+
+out:
+       cryptd_skcipher_complete(req, err);
+}
+
+static int cryptd_skcipher_enqueue(struct skcipher_request *req,
+                                  crypto_completion_t compl)
+{
+       struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct cryptd_queue *queue;
+
+       queue = cryptd_get_queue(crypto_skcipher_tfm(tfm));
+       rctx->complete = req->base.complete;
+       req->base.complete = compl;
+
+       return cryptd_enqueue_request(queue, &req->base);
+}
+
+static int cryptd_skcipher_encrypt_enqueue(struct skcipher_request *req)
+{
+       return cryptd_skcipher_enqueue(req, cryptd_skcipher_encrypt);
+}
+
+static int cryptd_skcipher_decrypt_enqueue(struct skcipher_request *req)
+{
+       return cryptd_skcipher_enqueue(req, cryptd_skcipher_decrypt);
+}
+
+static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
+{
+       struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+       struct skcipherd_instance_ctx *ictx = skcipher_instance_ctx(inst);
+       struct crypto_skcipher_spawn *spawn = &ictx->spawn;
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *cipher;
+
+       cipher = crypto_spawn_skcipher(spawn);
+       if (IS_ERR(cipher))
+               return PTR_ERR(cipher);
+
+       ctx->child = cipher;
+       crypto_skcipher_set_reqsize(
+               tfm, sizeof(struct cryptd_skcipher_request_ctx));
+       return 0;
+}
+
+static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
+{
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+       crypto_free_skcipher(ctx->child);
+}
+
+static void cryptd_skcipher_free(struct skcipher_instance *inst)
+{
+       struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst);
+
+       crypto_drop_skcipher(&ctx->spawn);
+}
+
+static int cryptd_create_skcipher(struct crypto_template *tmpl,
+                                 struct rtattr **tb,
+                                 struct cryptd_queue *queue)
+{
+       struct skcipherd_instance_ctx *ctx;
+       struct skcipher_instance *inst;
+       struct skcipher_alg *alg;
+       const char *name;
+       u32 type;
+       u32 mask;
+       int err;
+
+       type = 0;
+       mask = CRYPTO_ALG_ASYNC;
+
+       cryptd_check_internal(tb, &type, &mask);
+
+       name = crypto_attr_alg_name(tb[1]);
+       if (IS_ERR(name))
+               return PTR_ERR(name);
+
+       inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+       if (!inst)
+               return -ENOMEM;
+
+       ctx = skcipher_instance_ctx(inst);
+       ctx->queue = queue;
+
+       crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
+       err = crypto_grab_skcipher(&ctx->spawn, name, type, mask);
+       if (err)
+               goto out_free_inst;
+
+       alg = crypto_spawn_skcipher_alg(&ctx->spawn);
+       err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base);
+       if (err)
+               goto out_drop_skcipher;
+
+       inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
+                                  (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
+
+       inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
+       inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
+       inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
+       inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
+
+       inst->alg.base.cra_ctxsize = sizeof(struct cryptd_skcipher_ctx);
+
+       inst->alg.init = cryptd_skcipher_init_tfm;
+       inst->alg.exit = cryptd_skcipher_exit_tfm;
+
+       inst->alg.setkey = cryptd_skcipher_setkey;
+       inst->alg.encrypt = cryptd_skcipher_encrypt_enqueue;
+       inst->alg.decrypt = cryptd_skcipher_decrypt_enqueue;
+
+       inst->free = cryptd_skcipher_free;
+
+       err = skcipher_register_instance(tmpl, inst);
+       if (err) {
+out_drop_skcipher:
+               crypto_drop_skcipher(&ctx->spawn);
+out_free_inst:
+               kfree(inst);
+       }
+       return err;
+}
+
 static int cryptd_hash_init_tfm(struct crypto_tfm *tfm)
 {
        struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
@@ -895,7 +1117,11 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 
        switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
        case CRYPTO_ALG_TYPE_BLKCIPHER:
-               return cryptd_create_blkcipher(tmpl, tb, &queue);
+               if ((algt->type & CRYPTO_ALG_TYPE_MASK) ==
+                   CRYPTO_ALG_TYPE_BLKCIPHER)
+                       return cryptd_create_blkcipher(tmpl, tb, &queue);
+
+               return cryptd_create_skcipher(tmpl, tb, &queue);
        case CRYPTO_ALG_TYPE_DIGEST:
                return cryptd_create_hash(tmpl, tb, &queue);
        case CRYPTO_ALG_TYPE_AEAD:
@@ -985,6 +1211,58 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm)
 }
 EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher);
 
+struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name,
+                                             u32 type, u32 mask)
+{
+       char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+       struct cryptd_skcipher_ctx *ctx;
+       struct crypto_skcipher *tfm;
+
+       if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
+                    "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
+               return ERR_PTR(-EINVAL);
+
+       tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask);
+       if (IS_ERR(tfm))
+               return ERR_CAST(tfm);
+
+       if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
+               crypto_free_skcipher(tfm);
+               return ERR_PTR(-EINVAL);
+       }
+
+       ctx = crypto_skcipher_ctx(tfm);
+       atomic_set(&ctx->refcnt, 1);
+
+       return container_of(tfm, struct cryptd_skcipher, base);
+}
+EXPORT_SYMBOL_GPL(cryptd_alloc_skcipher);
+
+struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
+{
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
+
+       return ctx->child;
+}
+EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
+
+bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm)
+{
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
+
+       return atomic_read(&ctx->refcnt) - 1;
+}
+EXPORT_SYMBOL_GPL(cryptd_skcipher_queued);
+
+void cryptd_free_skcipher(struct cryptd_skcipher *tfm)
+{
+       struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
+
+       if (atomic_dec_and_test(&ctx->refcnt))
+               crypto_free_skcipher(&tfm->base);
+}
+EXPORT_SYMBOL_GPL(cryptd_free_skcipher);
+
 struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
                                        u32 type, u32 mask)
 {
index 6989ba0..f1bf341 100644 (file)
@@ -47,7 +47,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
        /* If another context is idling then defer */
        if (engine->idling) {
-               kthread_queue_work(&engine->kworker, &engine->pump_requests);
+               kthread_queue_work(engine->kworker, &engine->pump_requests);
                goto out;
        }
 
@@ -58,7 +58,7 @@ static void crypto_pump_requests(struct crypto_engine *engine,
 
                /* Only do teardown in the thread */
                if (!in_kthread) {
-                       kthread_queue_work(&engine->kworker,
+                       kthread_queue_work(engine->kworker,
                                           &engine->pump_requests);
                        goto out;
                }
@@ -189,7 +189,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine,
        ret = ablkcipher_enqueue_request(&engine->queue, req);
 
        if (!engine->busy && need_pump)
-               kthread_queue_work(&engine->kworker, &engine->pump_requests);
+               kthread_queue_work(engine->kworker, &engine->pump_requests);
 
        spin_unlock_irqrestore(&engine->queue_lock, flags);
        return ret;
@@ -231,7 +231,7 @@ int crypto_transfer_hash_request(struct crypto_engine *engine,
        ret = ahash_enqueue_request(&engine->queue, req);
 
        if (!engine->busy && need_pump)
-               kthread_queue_work(&engine->kworker, &engine->pump_requests);
+               kthread_queue_work(engine->kworker, &engine->pump_requests);
 
        spin_unlock_irqrestore(&engine->queue_lock, flags);
        return ret;
@@ -284,7 +284,7 @@ void crypto_finalize_cipher_request(struct crypto_engine *engine,
 
        req->base.complete(&req->base, err);
 
-       kthread_queue_work(&engine->kworker, &engine->pump_requests);
+       kthread_queue_work(engine->kworker, &engine->pump_requests);
 }
 EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request);
 
@@ -321,7 +321,7 @@ void crypto_finalize_hash_request(struct crypto_engine *engine,
 
        req->base.complete(&req->base, err);
 
-       kthread_queue_work(&engine->kworker, &engine->pump_requests);
+       kthread_queue_work(engine->kworker, &engine->pump_requests);
 }
 EXPORT_SYMBOL_GPL(crypto_finalize_hash_request);
 
@@ -345,7 +345,7 @@ int crypto_engine_start(struct crypto_engine *engine)
        engine->running = true;
        spin_unlock_irqrestore(&engine->queue_lock, flags);
 
-       kthread_queue_work(&engine->kworker, &engine->pump_requests);
+       kthread_queue_work(engine->kworker, &engine->pump_requests);
 
        return 0;
 }
@@ -422,11 +422,8 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
        crypto_init_queue(&engine->queue, CRYPTO_ENGINE_MAX_QLEN);
        spin_lock_init(&engine->queue_lock);
 
-       kthread_init_worker(&engine->kworker);
-       engine->kworker_task = kthread_run(kthread_worker_fn,
-                                          &engine->kworker, "%s",
-                                          engine->name);
-       if (IS_ERR(engine->kworker_task)) {
+       engine->kworker = kthread_create_worker(0, "%s", engine->name);
+       if (IS_ERR(engine->kworker)) {
                dev_err(dev, "failed to create crypto request pump task\n");
                return NULL;
        }
@@ -434,7 +431,7 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 
        if (engine->rt) {
                dev_info(dev, "will run requests pump with realtime priority\n");
-               sched_setscheduler(engine->kworker_task, SCHED_FIFO, &param);
+               sched_setscheduler(engine->kworker->task, SCHED_FIFO, &param);
        }
 
        return engine;
@@ -455,8 +452,7 @@ int crypto_engine_exit(struct crypto_engine *engine)
        if (ret)
                return ret;
 
-       kthread_flush_worker(&engine->kworker);
-       kthread_stop(engine->kworker_task);
+       kthread_destroy_worker(engine->kworker);
 
        return 0;
 }
index 1c57054..a90404a 100644 (file)
@@ -112,6 +112,21 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+       struct crypto_report_acomp racomp;
+
+       strncpy(racomp.type, "acomp", sizeof(racomp.type));
+
+       if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
+                   sizeof(struct crypto_report_acomp), &racomp))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+
 static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_akcipher rakcipher;
@@ -186,7 +201,11 @@ static int crypto_report_one(struct crypto_alg *alg,
                        goto nla_put_failure;
 
                break;
+       case CRYPTO_ALG_TYPE_ACOMPRESS:
+               if (crypto_report_acomp(skb, alg))
+                       goto nla_put_failure;
 
+               break;
        case CRYPTO_ALG_TYPE_AKCIPHER:
                if (crypto_report_akcipher(skb, alg))
                        goto nla_put_failure;
index ff4d21e..a9a7a44 100644 (file)
@@ -312,7 +312,7 @@ static int crypto_rfc3686_init_tfm(struct crypto_skcipher *tfm)
        unsigned long align;
        unsigned int reqsize;
 
-       cipher = crypto_spawn_skcipher2(spawn);
+       cipher = crypto_spawn_skcipher(spawn);
        if (IS_ERR(cipher))
                return PTR_ERR(cipher);
 
@@ -370,9 +370,9 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl,
        spawn = skcipher_instance_ctx(inst);
 
        crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
-       err = crypto_grab_skcipher2(spawn, cipher_name, 0,
-                                   crypto_requires_sync(algt->type,
-                                                        algt->mask));
+       err = crypto_grab_skcipher(spawn, cipher_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_free_inst;
 
index 5197618..00254d7 100644 (file)
@@ -290,7 +290,7 @@ static int crypto_cts_init_tfm(struct crypto_skcipher *tfm)
        unsigned bsize;
        unsigned align;
 
-       cipher = crypto_spawn_skcipher2(spawn);
+       cipher = crypto_spawn_skcipher(spawn);
        if (IS_ERR(cipher))
                return PTR_ERR(cipher);
 
@@ -348,9 +348,9 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
        spawn = skcipher_instance_ctx(inst);
 
        crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
-       err = crypto_grab_skcipher2(spawn, cipher_name, 0,
-                                   crypto_requires_sync(algt->type,
-                                                        algt->mask));
+       err = crypto_grab_skcipher(spawn, cipher_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_free_inst;
 
index 95d8d37..f942cb3 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/interrupt.h>
 #include <linux/mm.h>
 #include <linux/net.h>
+#include <crypto/internal/scompress.h>
 
 #define DEFLATE_DEF_LEVEL              Z_DEFAULT_COMPRESSION
 #define DEFLATE_DEF_WINBITS            11
@@ -101,9 +102,8 @@ static void deflate_decomp_exit(struct deflate_ctx *ctx)
        vfree(ctx->decomp_stream.workspace);
 }
 
-static int deflate_init(struct crypto_tfm *tfm)
+static int __deflate_init(void *ctx)
 {
-       struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
        int ret;
 
        ret = deflate_comp_init(ctx);
@@ -116,19 +116,55 @@ out:
        return ret;
 }
 
-static void deflate_exit(struct crypto_tfm *tfm)
+static void *deflate_alloc_ctx(struct crypto_scomp *tfm)
+{
+       struct deflate_ctx *ctx;
+       int ret;
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       ret = __deflate_init(ctx);
+       if (ret) {
+               kfree(ctx);
+               return ERR_PTR(ret);
+       }
+
+       return ctx;
+}
+
+static int deflate_init(struct crypto_tfm *tfm)
 {
        struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
 
+       return __deflate_init(ctx);
+}
+
+static void __deflate_exit(void *ctx)
+{
        deflate_comp_exit(ctx);
        deflate_decomp_exit(ctx);
 }
 
-static int deflate_compress(struct crypto_tfm *tfm, const u8 *src,
-                           unsigned int slen, u8 *dst, unsigned int *dlen)
+static void deflate_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+       __deflate_exit(ctx);
+       kzfree(ctx);
+}
+
+static void deflate_exit(struct crypto_tfm *tfm)
+{
+       struct deflate_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       __deflate_exit(ctx);
+}
+
+static int __deflate_compress(const u8 *src, unsigned int slen,
+                             u8 *dst, unsigned int *dlen, void *ctx)
 {
        int ret = 0;
-       struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
+       struct deflate_ctx *dctx = ctx;
        struct z_stream_s *stream = &dctx->comp_stream;
 
        ret = zlib_deflateReset(stream);
@@ -153,12 +189,27 @@ out:
        return ret;
 }
 
-static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src,
-                             unsigned int slen, u8 *dst, unsigned int *dlen)
+static int deflate_compress(struct crypto_tfm *tfm, const u8 *src,
+                           unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+       struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
+
+       return __deflate_compress(src, slen, dst, dlen, dctx);
+}
+
+static int deflate_scompress(struct crypto_scomp *tfm, const u8 *src,
+                            unsigned int slen, u8 *dst, unsigned int *dlen,
+                            void *ctx)
+{
+       return __deflate_compress(src, slen, dst, dlen, ctx);
+}
+
+static int __deflate_decompress(const u8 *src, unsigned int slen,
+                               u8 *dst, unsigned int *dlen, void *ctx)
 {
 
        int ret = 0;
-       struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
+       struct deflate_ctx *dctx = ctx;
        struct z_stream_s *stream = &dctx->decomp_stream;
 
        ret = zlib_inflateReset(stream);
@@ -194,6 +245,21 @@ out:
        return ret;
 }
 
+static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src,
+                             unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+       struct deflate_ctx *dctx = crypto_tfm_ctx(tfm);
+
+       return __deflate_decompress(src, slen, dst, dlen, dctx);
+}
+
+static int deflate_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+                              unsigned int slen, u8 *dst, unsigned int *dlen,
+                              void *ctx)
+{
+       return __deflate_decompress(src, slen, dst, dlen, ctx);
+}
+
 static struct crypto_alg alg = {
        .cra_name               = "deflate",
        .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
@@ -206,14 +272,39 @@ static struct crypto_alg alg = {
        .coa_decompress         = deflate_decompress } }
 };
 
+static struct scomp_alg scomp = {
+       .alloc_ctx              = deflate_alloc_ctx,
+       .free_ctx               = deflate_free_ctx,
+       .compress               = deflate_scompress,
+       .decompress             = deflate_sdecompress,
+       .base                   = {
+               .cra_name       = "deflate",
+               .cra_driver_name = "deflate-scomp",
+               .cra_module      = THIS_MODULE,
+       }
+};
+
 static int __init deflate_mod_init(void)
 {
-       return crypto_register_alg(&alg);
+       int ret;
+
+       ret = crypto_register_alg(&alg);
+       if (ret)
+               return ret;
+
+       ret = crypto_register_scomp(&scomp);
+       if (ret) {
+               crypto_unregister_alg(&alg);
+               return ret;
+       }
+
+       return ret;
 }
 
 static void __exit deflate_mod_fini(void)
 {
        crypto_unregister_alg(&alg);
+       crypto_unregister_scomp(&scomp);
 }
 
 module_init(deflate_mod_init);
index 9d19360..ddcb528 100644 (file)
@@ -118,7 +118,7 @@ static int dh_compute_value(struct kpp_request *req)
        if (req->src) {
                base = mpi_read_raw_from_sgl(req->src, req->src_len);
                if (!base) {
-                       ret = EINVAL;
+                       ret = -EINVAL;
                        goto err_free_val;
                }
        } else {
index 053035b..8a4d98b 100644 (file)
@@ -1782,6 +1782,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
                memcpy(outbuf, drbg->outscratchpad, cryptlen);
 
                outlen -= cryptlen;
+               outbuf += cryptlen;
        }
        ret = 0;
 
index f624ac9..b7ad808 100644 (file)
@@ -575,7 +575,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm)
        if (IS_ERR(ghash))
                return PTR_ERR(ghash);
 
-       ctr = crypto_spawn_skcipher2(&ictx->ctr);
+       ctr = crypto_spawn_skcipher(&ictx->ctr);
        err = PTR_ERR(ctr);
        if (IS_ERR(ctr))
                goto err_free_hash;
@@ -663,20 +663,20 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
                goto err_drop_ghash;
 
        crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst));
-       err = crypto_grab_skcipher2(&ctx->ctr, ctr_name, 0,
-                                   crypto_requires_sync(algt->type,
-                                                        algt->mask));
+       err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
        if (err)
                goto err_drop_ghash;
 
        ctr = crypto_spawn_skcipher_alg(&ctx->ctr);
 
        /* We only support 16-byte blocks. */
+       err = -EINVAL;
        if (crypto_skcipher_alg_ivsize(ctr) != 16)
                goto out_put_ctr;
 
        /* Not a stream cipher? */
-       err = -EINVAL;
        if (ctr->base.cra_blocksize != 1)
                goto out_put_ctr;
 
index 5276607..72015fe 100644 (file)
@@ -263,48 +263,6 @@ EXPORT_SYMBOL(gf128mul_bbe);
  * t[1][BYTE] contains g*x^8*BYTE
  *  ..
  * t[15][BYTE] contains g*x^120*BYTE */
-struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g)
-{
-       struct gf128mul_64k *t;
-       int i, j, k;
-
-       t = kzalloc(sizeof(*t), GFP_KERNEL);
-       if (!t)
-               goto out;
-
-       for (i = 0; i < 16; i++) {
-               t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
-               if (!t->t[i]) {
-                       gf128mul_free_64k(t);
-                       t = NULL;
-                       goto out;
-               }
-       }
-
-       t->t[0]->t[128] = *g;
-       for (j = 64; j > 0; j >>= 1)
-               gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
-
-       for (i = 0;;) {
-               for (j = 2; j < 256; j += j)
-                       for (k = 1; k < j; ++k)
-                               be128_xor(&t->t[i]->t[j + k],
-                                         &t->t[i]->t[j], &t->t[i]->t[k]);
-
-               if (++i >= 16)
-                       break;
-
-               for (j = 128; j > 0; j >>= 1) {
-                       t->t[i]->t[j] = t->t[i - 1]->t[j];
-                       gf128mul_x8_lle(&t->t[i]->t[j]);
-               }
-       }
-
-out:
-       return t;
-}
-EXPORT_SYMBOL(gf128mul_init_64k_lle);
-
 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
 {
        struct gf128mul_64k *t;
@@ -352,24 +310,11 @@ void gf128mul_free_64k(struct gf128mul_64k *t)
        int i;
 
        for (i = 0; i < 16; i++)
-               kfree(t->t[i]);
-       kfree(t);
+               kzfree(t->t[i]);
+       kzfree(t);
 }
 EXPORT_SYMBOL(gf128mul_free_64k);
 
-void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t)
-{
-       u8 *ap = (u8 *)a;
-       be128 r[1];
-       int i;
-
-       *r = t->t[0]->t[ap[0]];
-       for (i = 1; i < 16; ++i)
-               be128_xor(r, r, &t->t[i]->t[ap[i]]);
-       *a = *r;
-}
-EXPORT_SYMBOL(gf128mul_64k_lle);
-
 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
 {
        u8 *ap = (u8 *)a;
index 7eefcdb..f073204 100644 (file)
@@ -76,9 +76,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 int crypto_init_cipher_ops(struct crypto_tfm *tfm);
 int crypto_init_compress_ops(struct crypto_tfm *tfm);
 
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
-void crypto_exit_compress_ops(struct crypto_tfm *tfm);
-
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
 void crypto_larval_kill(struct crypto_alg *alg);
 struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
index c493849..787dccc 100644 (file)
@@ -39,7 +39,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/module.h>
 #include <linux/fips.h>
 #include <linux/time.h>
 #include <linux/crypto.h>
index 6f9908a..ecd8474 100644 (file)
@@ -17,7 +17,8 @@
  *
  * The test vectors are included in the testing module tcrypt.[ch] */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <crypto/gf128mul.h>
 #include <crypto/lrw.h>
 
+#define LRW_BUFFER_SIZE 128u
+
 struct priv {
-       struct crypto_cipher *child;
+       struct crypto_skcipher *child;
        struct lrw_table_ctx table;
 };
 
+struct rctx {
+       be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
+
+       be128 t;
+
+       be128 *ext;
+
+       struct scatterlist srcbuf[2];
+       struct scatterlist dstbuf[2];
+       struct scatterlist *src;
+       struct scatterlist *dst;
+
+       unsigned int left;
+
+       struct skcipher_request subreq;
+};
+
 static inline void setbit128_bbe(void *b, int bit)
 {
        __set_bit(bit ^ (0x80 -
@@ -76,32 +96,26 @@ void lrw_free_table(struct lrw_table_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(lrw_free_table);
 
-static int setkey(struct crypto_tfm *parent, const u8 *key,
+static int setkey(struct crypto_skcipher *parent, const u8 *key,
                  unsigned int keylen)
 {
-       struct priv *ctx = crypto_tfm_ctx(parent);
-       struct crypto_cipher *child = ctx->child;
+       struct priv *ctx = crypto_skcipher_ctx(parent);
+       struct crypto_skcipher *child = ctx->child;
        int err, bsize = LRW_BLOCK_SIZE;
        const u8 *tweak = key + keylen - bsize;
 
-       crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-       crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
-                                      CRYPTO_TFM_REQ_MASK);
-       err = crypto_cipher_setkey(child, key, keylen - bsize);
+       crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+                                        CRYPTO_TFM_REQ_MASK);
+       err = crypto_skcipher_setkey(child, key, keylen - bsize);
+       crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+                                         CRYPTO_TFM_RES_MASK);
        if (err)
                return err;
-       crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-                                    CRYPTO_TFM_RES_MASK);
 
        return lrw_init_table(&ctx->table, tweak);
 }
 
-struct sinfo {
-       be128 t;
-       struct crypto_tfm *tfm;
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *);
-};
-
 static inline void inc(be128 *iv)
 {
        be64_add_cpu(&iv->b, 1);
@@ -109,13 +123,6 @@ static inline void inc(be128 *iv)
                be64_add_cpu(&iv->a, 1);
 }
 
-static inline void lrw_round(struct sinfo *s, void *dst, const void *src)
-{
-       be128_xor(dst, &s->t, src);             /* PP <- T xor P */
-       s->fn(s->tfm, dst, dst);                /* CC <- E(Key2,PP) */
-       be128_xor(dst, dst, &s->t);             /* C <- T xor CC */
-}
-
 /* this returns the number of consequative 1 bits starting
  * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
 static inline int get_index128(be128 *block)
@@ -135,83 +142,263 @@ static inline int get_index128(be128 *block)
        return x;
 }
 
-static int crypt(struct blkcipher_desc *d,
-                struct blkcipher_walk *w, struct priv *ctx,
-                void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
+static int post_crypt(struct skcipher_request *req)
 {
+       struct rctx *rctx = skcipher_request_ctx(req);
+       be128 *buf = rctx->ext ?: rctx->buf;
+       struct skcipher_request *subreq;
+       const int bs = LRW_BLOCK_SIZE;
+       struct skcipher_walk w;
+       struct scatterlist *sg;
+       unsigned offset;
        int err;
-       unsigned int avail;
+
+       subreq = &rctx->subreq;
+       err = skcipher_walk_virt(&w, subreq, false);
+
+       while (w.nbytes) {
+               unsigned int avail = w.nbytes;
+               be128 *wdst;
+
+               wdst = w.dst.virt.addr;
+
+               do {
+                       be128_xor(wdst, buf++, wdst);
+                       wdst++;
+               } while ((avail -= bs) >= bs);
+
+               err = skcipher_walk_done(&w, avail);
+       }
+
+       rctx->left -= subreq->cryptlen;
+
+       if (err || !rctx->left)
+               goto out;
+
+       rctx->dst = rctx->dstbuf;
+
+       scatterwalk_done(&w.out, 0, 1);
+       sg = w.out.sg;
+       offset = w.out.offset;
+
+       if (rctx->dst != sg) {
+               rctx->dst[0] = *sg;
+               sg_unmark_end(rctx->dst);
+               scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
+       }
+       rctx->dst[0].length -= offset - sg->offset;
+       rctx->dst[0].offset = offset;
+
+out:
+       return err;
+}
+
+static int pre_crypt(struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct rctx *rctx = skcipher_request_ctx(req);
+       struct priv *ctx = crypto_skcipher_ctx(tfm);
+       be128 *buf = rctx->ext ?: rctx->buf;
+       struct skcipher_request *subreq;
        const int bs = LRW_BLOCK_SIZE;
-       struct sinfo s = {
-               .tfm = crypto_cipher_tfm(ctx->child),
-               .fn = fn
-       };
+       struct skcipher_walk w;
+       struct scatterlist *sg;
+       unsigned cryptlen;
+       unsigned offset;
        be128 *iv;
-       u8 *wsrc;
-       u8 *wdst;
+       bool more;
+       int err;
 
-       err = blkcipher_walk_virt(d, w);
-       if (!(avail = w->nbytes))
-               return err;
+       subreq = &rctx->subreq;
+       skcipher_request_set_tfm(subreq, tfm);
 
-       wsrc = w->src.virt.addr;
-       wdst = w->dst.virt.addr;
+       cryptlen = subreq->cryptlen;
+       more = rctx->left > cryptlen;
+       if (!more)
+               cryptlen = rctx->left;
 
-       /* calculate first value of T */
-       iv = (be128 *)w->iv;
-       s.t = *iv;
+       skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
+                                  cryptlen, req->iv);
 
-       /* T <- I*Key2 */
-       gf128mul_64k_bbe(&s.t, ctx->table.table);
+       err = skcipher_walk_virt(&w, subreq, false);
+       iv = w.iv;
 
-       goto first;
+       while (w.nbytes) {
+               unsigned int avail = w.nbytes;
+               be128 *wsrc;
+               be128 *wdst;
+
+               wsrc = w.src.virt.addr;
+               wdst = w.dst.virt.addr;
 
-       for (;;) {
                do {
+                       *buf++ = rctx->t;
+                       be128_xor(wdst++, &rctx->t, wsrc++);
+
                        /* T <- I*Key2, using the optimization
                         * discussed in the specification */
-                       be128_xor(&s.t, &s.t,
+                       be128_xor(&rctx->t, &rctx->t,
                                  &ctx->table.mulinc[get_index128(iv)]);
                        inc(iv);
+               } while ((avail -= bs) >= bs);
 
-first:
-                       lrw_round(&s, wdst, wsrc);
+               err = skcipher_walk_done(&w, avail);
+       }
 
-                       wsrc += bs;
-                       wdst += bs;
-               } while ((avail -= bs) >= bs);
+       skcipher_request_set_tfm(subreq, ctx->child);
+       skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
+                                  cryptlen, NULL);
 
-               err = blkcipher_walk_done(d, w, avail);
-               if (!(avail = w->nbytes))
-                       break;
+       if (err || !more)
+               goto out;
+
+       rctx->src = rctx->srcbuf;
+
+       scatterwalk_done(&w.in, 0, 1);
+       sg = w.in.sg;
+       offset = w.in.offset;
+
+       if (rctx->src != sg) {
+               rctx->src[0] = *sg;
+               sg_unmark_end(rctx->src);
+               scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
+       }
+       rctx->src[0].length -= offset - sg->offset;
+       rctx->src[0].offset = offset;
+
+out:
+       return err;
+}
+
+static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
+{
+       struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+       struct rctx *rctx = skcipher_request_ctx(req);
+       struct skcipher_request *subreq;
+       gfp_t gfp;
+
+       subreq = &rctx->subreq;
+       skcipher_request_set_callback(subreq, req->base.flags, done, req);
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+                                                          GFP_ATOMIC;
+       rctx->ext = NULL;
+
+       subreq->cryptlen = LRW_BUFFER_SIZE;
+       if (req->cryptlen > LRW_BUFFER_SIZE) {
+               subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
+               rctx->ext = kmalloc(subreq->cryptlen, gfp);
+       }
+
+       rctx->src = req->src;
+       rctx->dst = req->dst;
+       rctx->left = req->cryptlen;
+
+       /* calculate first value of T */
+       memcpy(&rctx->t, req->iv, sizeof(rctx->t));
+
+       /* T <- I*Key2 */
+       gf128mul_64k_bbe(&rctx->t, ctx->table.table);
 
-               wsrc = w->src.virt.addr;
-               wdst = w->dst.virt.addr;
+       return 0;
+}
+
+static void exit_crypt(struct skcipher_request *req)
+{
+       struct rctx *rctx = skcipher_request_ctx(req);
+
+       rctx->left = 0;
+
+       if (rctx->ext)
+               kfree(rctx->ext);
+}
+
+static int do_encrypt(struct skcipher_request *req, int err)
+{
+       struct rctx *rctx = skcipher_request_ctx(req);
+       struct skcipher_request *subreq;
+
+       subreq = &rctx->subreq;
+
+       while (!err && rctx->left) {
+               err = pre_crypt(req) ?:
+                     crypto_skcipher_encrypt(subreq) ?:
+                     post_crypt(req);
+
+               if (err == -EINPROGRESS ||
+                   (err == -EBUSY &&
+                    req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return err;
        }
 
+       exit_crypt(req);
        return err;
 }
 
-static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                  struct scatterlist *src, unsigned int nbytes)
+static void encrypt_done(struct crypto_async_request *areq, int err)
+{
+       struct skcipher_request *req = areq->data;
+       struct skcipher_request *subreq;
+       struct rctx *rctx;
+
+       rctx = skcipher_request_ctx(req);
+       subreq = &rctx->subreq;
+       subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+       err = do_encrypt(req, err ?: post_crypt(req));
+       if (rctx->left)
+               return;
+
+       skcipher_request_complete(req, err);
+}
+
+static int encrypt(struct skcipher_request *req)
+{
+       return do_encrypt(req, init_crypt(req, encrypt_done));
+}
+
+static int do_decrypt(struct skcipher_request *req, int err)
 {
-       struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk w;
+       struct rctx *rctx = skcipher_request_ctx(req);
+       struct skcipher_request *subreq;
+
+       subreq = &rctx->subreq;
+
+       while (!err && rctx->left) {
+               err = pre_crypt(req) ?:
+                     crypto_skcipher_decrypt(subreq) ?:
+                     post_crypt(req);
+
+               if (err == -EINPROGRESS ||
+                   (err == -EBUSY &&
+                    req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return err;
+       }
 
-       blkcipher_walk_init(&w, dst, src, nbytes);
-       return crypt(desc, &w, ctx,
-                    crypto_cipher_alg(ctx->child)->cia_encrypt);
+       exit_crypt(req);
+       return err;
 }
 
-static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                  struct scatterlist *src, unsigned int nbytes)
+static void decrypt_done(struct crypto_async_request *areq, int err)
 {
-       struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk w;
+       struct skcipher_request *req = areq->data;
+       struct skcipher_request *subreq;
+       struct rctx *rctx;
+
+       rctx = skcipher_request_ctx(req);
+       subreq = &rctx->subreq;
+       subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+       err = do_decrypt(req, err ?: post_crypt(req));
+       if (rctx->left)
+               return;
 
-       blkcipher_walk_init(&w, dst, src, nbytes);
-       return crypt(desc, &w, ctx,
-                    crypto_cipher_alg(ctx->child)->cia_decrypt);
+       skcipher_request_complete(req, err);
+}
+
+static int decrypt(struct skcipher_request *req)
+{
+       return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
 int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
@@ -293,95 +480,161 @@ first:
 }
 EXPORT_SYMBOL_GPL(lrw_crypt);
 
-static int init_tfm(struct crypto_tfm *tfm)
+static int init_tfm(struct crypto_skcipher *tfm)
 {
-       struct crypto_cipher *cipher;
-       struct crypto_instance *inst = (void *)tfm->__crt_alg;
-       struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-       struct priv *ctx = crypto_tfm_ctx(tfm);
-       u32 *flags = &tfm->crt_flags;
+       struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+       struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst);
+       struct priv *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *cipher;
 
-       cipher = crypto_spawn_cipher(spawn);
+       cipher = crypto_spawn_skcipher(spawn);
        if (IS_ERR(cipher))
                return PTR_ERR(cipher);
 
-       if (crypto_cipher_blocksize(cipher) != LRW_BLOCK_SIZE) {
-               *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-               crypto_free_cipher(cipher);
-               return -EINVAL;
-       }
-
        ctx->child = cipher;
+
+       crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(cipher) +
+                                        sizeof(struct rctx));
+
        return 0;
 }
 
-static void exit_tfm(struct crypto_tfm *tfm)
+static void exit_tfm(struct crypto_skcipher *tfm)
 {
-       struct priv *ctx = crypto_tfm_ctx(tfm);
+       struct priv *ctx = crypto_skcipher_ctx(tfm);
 
        lrw_free_table(&ctx->table);
-       crypto_free_cipher(ctx->child);
+       crypto_free_skcipher(ctx->child);
+}
+
+static void free(struct skcipher_instance *inst)
+{
+       crypto_drop_skcipher(skcipher_instance_ctx(inst));
+       kfree(inst);
 }
 
-static struct crypto_instance *alloc(struct rtattr **tb)
+static int create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_instance *inst;
-       struct crypto_alg *alg;
+       struct crypto_skcipher_spawn *spawn;
+       struct skcipher_instance *inst;
+       struct crypto_attr_type *algt;
+       struct skcipher_alg *alg;
+       const char *cipher_name;
+       char ecb_name[CRYPTO_MAX_ALG_NAME];
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+       algt = crypto_get_attr_type(tb);
+       if (IS_ERR(algt))
+               return PTR_ERR(algt);
+
+       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+               return -EINVAL;
+
+       cipher_name = crypto_attr_alg_name(tb[1]);
+       if (IS_ERR(cipher_name))
+               return PTR_ERR(cipher_name);
+
+       inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+       if (!inst)
+               return -ENOMEM;
+
+       spawn = skcipher_instance_ctx(inst);
+
+       crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
+       err = crypto_grab_skcipher(spawn, cipher_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
+       if (err == -ENOENT) {
+               err = -ENAMETOOLONG;
+               if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
+                            cipher_name) >= CRYPTO_MAX_ALG_NAME)
+                       goto err_free_inst;
+
+               err = crypto_grab_skcipher(spawn, ecb_name, 0,
+                                          crypto_requires_sync(algt->type,
+                                                               algt->mask));
+       }
+
        if (err)
-               return ERR_PTR(err);
+               goto err_free_inst;
 
-       alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-                                 CRYPTO_ALG_TYPE_MASK);
-       if (IS_ERR(alg))
-               return ERR_CAST(alg);
+       alg = crypto_skcipher_spawn_alg(spawn);
 
-       inst = crypto_alloc_instance("lrw", alg);
-       if (IS_ERR(inst))
-               goto out_put_alg;
+       err = -EINVAL;
+       if (alg->base.cra_blocksize != LRW_BLOCK_SIZE)
+               goto err_drop_spawn;
 
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-       inst->alg.cra_priority = alg->cra_priority;
-       inst->alg.cra_blocksize = alg->cra_blocksize;
+       if (crypto_skcipher_alg_ivsize(alg))
+               goto err_drop_spawn;
 
-       if (alg->cra_alignmask < 7) inst->alg.cra_alignmask = 7;
-       else inst->alg.cra_alignmask = alg->cra_alignmask;
-       inst->alg.cra_type = &crypto_blkcipher_type;
+       err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw",
+                                 &alg->base);
+       if (err)
+               goto err_drop_spawn;
 
-       if (!(alg->cra_blocksize % 4))
-               inst->alg.cra_alignmask |= 3;
-       inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-       inst->alg.cra_blkcipher.min_keysize =
-               alg->cra_cipher.cia_min_keysize + alg->cra_blocksize;
-       inst->alg.cra_blkcipher.max_keysize =
-               alg->cra_cipher.cia_max_keysize + alg->cra_blocksize;
+       err = -EINVAL;
+       cipher_name = alg->base.cra_name;
 
-       inst->alg.cra_ctxsize = sizeof(struct priv);
+       /* Alas we screwed up the naming so we have to mangle the
+        * cipher name.
+        */
+       if (!strncmp(cipher_name, "ecb(", 4)) {
+               unsigned len;
 
-       inst->alg.cra_init = init_tfm;
-       inst->alg.cra_exit = exit_tfm;
+               len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name));
+               if (len < 2 || len >= sizeof(ecb_name))
+                       goto err_drop_spawn;
 
-       inst->alg.cra_blkcipher.setkey = setkey;
-       inst->alg.cra_blkcipher.encrypt = encrypt;
-       inst->alg.cra_blkcipher.decrypt = decrypt;
+               if (ecb_name[len - 1] != ')')
+                       goto err_drop_spawn;
 
-out_put_alg:
-       crypto_mod_put(alg);
-       return inst;
-}
+               ecb_name[len - 1] = 0;
 
-static void free(struct crypto_instance *inst)
-{
-       crypto_drop_spawn(crypto_instance_ctx(inst));
+               if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+                            "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME)
+                       return -ENAMETOOLONG;
+       }
+
+       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_priority = alg->base.cra_priority;
+       inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
+       inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
+                                      (__alignof__(u64) - 1);
+
+       inst->alg.ivsize = LRW_BLOCK_SIZE;
+       inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
+                               LRW_BLOCK_SIZE;
+       inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) +
+                               LRW_BLOCK_SIZE;
+
+       inst->alg.base.cra_ctxsize = sizeof(struct priv);
+
+       inst->alg.init = init_tfm;
+       inst->alg.exit = exit_tfm;
+
+       inst->alg.setkey = setkey;
+       inst->alg.encrypt = encrypt;
+       inst->alg.decrypt = decrypt;
+
+       inst->free = free;
+
+       err = skcipher_register_instance(tmpl, inst);
+       if (err)
+               goto err_drop_spawn;
+
+out:
+       return err;
+
+err_drop_spawn:
+       crypto_drop_skcipher(spawn);
+err_free_inst:
        kfree(inst);
+       goto out;
 }
 
 static struct crypto_template crypto_tmpl = {
        .name = "lrw",
-       .alloc = alloc,
-       .free = free,
+       .create = create,
        .module = THIS_MODULE,
 };
 
index aefbcea..99c1b2c 100644 (file)
 #include <linux/crypto.h>
 #include <linux/vmalloc.h>
 #include <linux/lz4.h>
+#include <crypto/internal/scompress.h>
 
 struct lz4_ctx {
        void *lz4_comp_mem;
 };
 
+static void *lz4_alloc_ctx(struct crypto_scomp *tfm)
+{
+       void *ctx;
+
+       ctx = vmalloc(LZ4_MEM_COMPRESS);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       return ctx;
+}
+
 static int lz4_init(struct crypto_tfm *tfm)
 {
        struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS);
-       if (!ctx->lz4_comp_mem)
+       ctx->lz4_comp_mem = lz4_alloc_ctx(NULL);
+       if (IS_ERR(ctx->lz4_comp_mem))
                return -ENOMEM;
 
        return 0;
 }
 
+static void lz4_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+       vfree(ctx);
+}
+
 static void lz4_exit(struct crypto_tfm *tfm)
 {
        struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
-       vfree(ctx->lz4_comp_mem);
+
+       lz4_free_ctx(NULL, ctx->lz4_comp_mem);
 }
 
-static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
-                           unsigned int slen, u8 *dst, unsigned int *dlen)
+static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
+                                u8 *dst, unsigned int *dlen, void *ctx)
 {
-       struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
        size_t tmp_len = *dlen;
        int err;
 
-       err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem);
+       err = lz4_compress(src, slen, dst, &tmp_len, ctx);
 
        if (err < 0)
                return -EINVAL;
@@ -61,8 +78,23 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
        return 0;
 }
 
-static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
-                             unsigned int slen, u8 *dst, unsigned int *dlen)
+static int lz4_scompress(struct crypto_scomp *tfm, const u8 *src,
+                        unsigned int slen, u8 *dst, unsigned int *dlen,
+                        void *ctx)
+{
+       return __lz4_compress_crypto(src, slen, dst, dlen, ctx);
+}
+
+static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                              unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+       struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       return __lz4_compress_crypto(src, slen, dst, dlen, ctx->lz4_comp_mem);
+}
+
+static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
+                                  u8 *dst, unsigned int *dlen, void *ctx)
 {
        int err;
        size_t tmp_len = *dlen;
@@ -76,6 +108,20 @@ static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
        return err;
 }
 
+static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+                          unsigned int slen, u8 *dst, unsigned int *dlen,
+                          void *ctx)
+{
+       return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
+static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                                unsigned int slen, u8 *dst,
+                                unsigned int *dlen)
+{
+       return __lz4_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
 static struct crypto_alg alg_lz4 = {
        .cra_name               = "lz4",
        .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
@@ -89,14 +135,39 @@ static struct crypto_alg alg_lz4 = {
        .coa_decompress         = lz4_decompress_crypto } }
 };
 
+static struct scomp_alg scomp = {
+       .alloc_ctx              = lz4_alloc_ctx,
+       .free_ctx               = lz4_free_ctx,
+       .compress               = lz4_scompress,
+       .decompress             = lz4_sdecompress,
+       .base                   = {
+               .cra_name       = "lz4",
+               .cra_driver_name = "lz4-scomp",
+               .cra_module      = THIS_MODULE,
+       }
+};
+
 static int __init lz4_mod_init(void)
 {
-       return crypto_register_alg(&alg_lz4);
+       int ret;
+
+       ret = crypto_register_alg(&alg_lz4);
+       if (ret)
+               return ret;
+
+       ret = crypto_register_scomp(&scomp);
+       if (ret) {
+               crypto_unregister_alg(&alg_lz4);
+               return ret;
+       }
+
+       return ret;
 }
 
 static void __exit lz4_mod_fini(void)
 {
        crypto_unregister_alg(&alg_lz4);
+       crypto_unregister_scomp(&scomp);
 }
 
 module_init(lz4_mod_init);
index a1d3b5b..75ffc4a 100644 (file)
 #include <linux/crypto.h>
 #include <linux/vmalloc.h>
 #include <linux/lz4.h>
+#include <crypto/internal/scompress.h>
 
 struct lz4hc_ctx {
        void *lz4hc_comp_mem;
 };
 
+static void *lz4hc_alloc_ctx(struct crypto_scomp *tfm)
+{
+       void *ctx;
+
+       ctx = vmalloc(LZ4HC_MEM_COMPRESS);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       return ctx;
+}
+
 static int lz4hc_init(struct crypto_tfm *tfm)
 {
        struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS);
-       if (!ctx->lz4hc_comp_mem)
+       ctx->lz4hc_comp_mem = lz4hc_alloc_ctx(NULL);
+       if (IS_ERR(ctx->lz4hc_comp_mem))
                return -ENOMEM;
 
        return 0;
 }
 
+static void lz4hc_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+       vfree(ctx);
+}
+
 static void lz4hc_exit(struct crypto_tfm *tfm)
 {
        struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       vfree(ctx->lz4hc_comp_mem);
+       lz4hc_free_ctx(NULL, ctx->lz4hc_comp_mem);
 }
 
-static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
-                           unsigned int slen, u8 *dst, unsigned int *dlen)
+static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
+                                  u8 *dst, unsigned int *dlen, void *ctx)
 {
-       struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
        size_t tmp_len = *dlen;
        int err;
 
-       err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem);
+       err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
 
        if (err < 0)
                return -EINVAL;
@@ -61,8 +77,25 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
        return 0;
 }
 
-static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
-                             unsigned int slen, u8 *dst, unsigned int *dlen)
+static int lz4hc_scompress(struct crypto_scomp *tfm, const u8 *src,
+                          unsigned int slen, u8 *dst, unsigned int *dlen,
+                          void *ctx)
+{
+       return __lz4hc_compress_crypto(src, slen, dst, dlen, ctx);
+}
+
+static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                                unsigned int slen, u8 *dst,
+                                unsigned int *dlen)
+{
+       struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       return __lz4hc_compress_crypto(src, slen, dst, dlen,
+                                       ctx->lz4hc_comp_mem);
+}
+
+static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
+                                    u8 *dst, unsigned int *dlen, void *ctx)
 {
        int err;
        size_t tmp_len = *dlen;
@@ -76,6 +109,20 @@ static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
        return err;
 }
 
+static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+                            unsigned int slen, u8 *dst, unsigned int *dlen,
+                            void *ctx)
+{
+       return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
+static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+                                  unsigned int slen, u8 *dst,
+                                  unsigned int *dlen)
+{
+       return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL);
+}
+
 static struct crypto_alg alg_lz4hc = {
        .cra_name               = "lz4hc",
        .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
@@ -89,14 +136,39 @@ static struct crypto_alg alg_lz4hc = {
        .coa_decompress         = lz4hc_decompress_crypto } }
 };
 
+static struct scomp_alg scomp = {
+       .alloc_ctx              = lz4hc_alloc_ctx,
+       .free_ctx               = lz4hc_free_ctx,
+       .compress               = lz4hc_scompress,
+       .decompress             = lz4hc_sdecompress,
+       .base                   = {
+               .cra_name       = "lz4hc",
+               .cra_driver_name = "lz4hc-scomp",
+               .cra_module      = THIS_MODULE,
+       }
+};
+
 static int __init lz4hc_mod_init(void)
 {
-       return crypto_register_alg(&alg_lz4hc);
+       int ret;
+
+       ret = crypto_register_alg(&alg_lz4hc);
+       if (ret)
+               return ret;
+
+       ret = crypto_register_scomp(&scomp);
+       if (ret) {
+               crypto_unregister_alg(&alg_lz4hc);
+               return ret;
+       }
+
+       return ret;
 }
 
 static void __exit lz4hc_mod_fini(void)
 {
        crypto_unregister_alg(&alg_lz4hc);
+       crypto_unregister_scomp(&scomp);
 }
 
 module_init(lz4hc_mod_init);
index c3f3dd9..168df78 100644 (file)
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/lzo.h>
+#include <crypto/internal/scompress.h>
 
 struct lzo_ctx {
        void *lzo_comp_mem;
 };
 
+static void *lzo_alloc_ctx(struct crypto_scomp *tfm)
+{
+       void *ctx;
+
+       ctx = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
+       if (!ctx)
+               ctx = vmalloc(LZO1X_MEM_COMPRESS);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       return ctx;
+}
+
 static int lzo_init(struct crypto_tfm *tfm)
 {
        struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       ctx->lzo_comp_mem = kmalloc(LZO1X_MEM_COMPRESS,
-                                   GFP_KERNEL | __GFP_NOWARN);
-       if (!ctx->lzo_comp_mem)
-               ctx->lzo_comp_mem = vmalloc(LZO1X_MEM_COMPRESS);
-       if (!ctx->lzo_comp_mem)
+       ctx->lzo_comp_mem = lzo_alloc_ctx(NULL);
+       if (IS_ERR(ctx->lzo_comp_mem))
                return -ENOMEM;
 
        return 0;
 }
 
+static void lzo_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+       kvfree(ctx);
+}
+
 static void lzo_exit(struct crypto_tfm *tfm)
 {
        struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
 
-       kvfree(ctx->lzo_comp_mem);
+       lzo_free_ctx(NULL, ctx->lzo_comp_mem);
 }
 
-static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,
-                           unsigned int slen, u8 *dst, unsigned int *dlen)
+static int __lzo_compress(const u8 *src, unsigned int slen,
+                         u8 *dst, unsigned int *dlen, void *ctx)
 {
-       struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
        size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
        int err;
 
-       err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx->lzo_comp_mem);
+       err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx);
 
        if (err != LZO_E_OK)
                return -EINVAL;
@@ -64,8 +79,23 @@ static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,
        return 0;
 }
 
-static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src,
-                             unsigned int slen, u8 *dst, unsigned int *dlen)
+static int lzo_compress(struct crypto_tfm *tfm, const u8 *src,
+                       unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+       struct lzo_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       return __lzo_compress(src, slen, dst, dlen, ctx->lzo_comp_mem);
+}
+
+static int lzo_scompress(struct crypto_scomp *tfm, const u8 *src,
+                        unsigned int slen, u8 *dst, unsigned int *dlen,
+                        void *ctx)
+{
+       return __lzo_compress(src, slen, dst, dlen, ctx);
+}
+
+static int __lzo_decompress(const u8 *src, unsigned int slen,
+                           u8 *dst, unsigned int *dlen)
 {
        int err;
        size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
@@ -77,7 +107,19 @@ static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src,
 
        *dlen = tmp_len;
        return 0;
+}
 
+static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src,
+                         unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+       return __lzo_decompress(src, slen, dst, dlen);
+}
+
+static int lzo_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+                          unsigned int slen, u8 *dst, unsigned int *dlen,
+                          void *ctx)
+{
+       return __lzo_decompress(src, slen, dst, dlen);
 }
 
 static struct crypto_alg alg = {
@@ -88,18 +130,43 @@ static struct crypto_alg alg = {
        .cra_init               = lzo_init,
        .cra_exit               = lzo_exit,
        .cra_u                  = { .compress = {
-       .coa_compress           = lzo_compress,
-       .coa_decompress         = lzo_decompress } }
+       .coa_compress           = lzo_compress,
+       .coa_decompress         = lzo_decompress } }
+};
+
+static struct scomp_alg scomp = {
+       .alloc_ctx              = lzo_alloc_ctx,
+       .free_ctx               = lzo_free_ctx,
+       .compress               = lzo_scompress,
+       .decompress             = lzo_sdecompress,
+       .base                   = {
+               .cra_name       = "lzo",
+               .cra_driver_name = "lzo-scomp",
+               .cra_module      = THIS_MODULE,
+       }
 };
 
 static int __init lzo_mod_init(void)
 {
-       return crypto_register_alg(&alg);
+       int ret;
+
+       ret = crypto_register_alg(&alg);
+       if (ret)
+               return ret;
+
+       ret = crypto_register_scomp(&scomp);
+       if (ret) {
+               crypto_unregister_alg(&alg);
+               return ret;
+       }
+
+       return ret;
 }
 
 static void __exit lzo_mod_fini(void)
 {
        crypto_unregister_alg(&alg);
+       crypto_unregister_scomp(&scomp);
 }
 
 module_init(lzo_mod_init);
index f654965..e4538e0 100644 (file)
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/scatterlist.h>
 #include <linux/slab.h>
 
 struct crypto_pcbc_ctx {
        struct crypto_cipher *child;
 };
 
-static int crypto_pcbc_setkey(struct crypto_tfm *parent, const u8 *key,
+static int crypto_pcbc_setkey(struct crypto_skcipher *parent, const u8 *key,
                              unsigned int keylen)
 {
-       struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(parent);
+       struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(parent);
        struct crypto_cipher *child = ctx->child;
        int err;
 
        crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-       crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
-                               CRYPTO_TFM_REQ_MASK);
+       crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+                                      CRYPTO_TFM_REQ_MASK);
        err = crypto_cipher_setkey(child, key, keylen);
-       crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-                            CRYPTO_TFM_RES_MASK);
+       crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+                                         CRYPTO_TFM_RES_MASK);
        return err;
 }
 
-static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
-                                      struct blkcipher_walk *walk,
+static int crypto_pcbc_encrypt_segment(struct skcipher_request *req,
+                                      struct skcipher_walk *walk,
                                       struct crypto_cipher *tfm)
 {
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_encrypt;
        int bsize = crypto_cipher_blocksize(tfm);
        unsigned int nbytes = walk->nbytes;
        u8 *src = walk->src.virt.addr;
@@ -56,7 +53,7 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
 
        do {
                crypto_xor(iv, src, bsize);
-               fn(crypto_cipher_tfm(tfm), dst, iv);
+               crypto_cipher_encrypt_one(tfm, dst, iv);
                memcpy(iv, dst, bsize);
                crypto_xor(iv, src, bsize);
 
@@ -67,12 +64,10 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
        return nbytes;
 }
 
-static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
-                                      struct blkcipher_walk *walk,
+static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
+                                      struct skcipher_walk *walk,
                                       struct crypto_cipher *tfm)
 {
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_encrypt;
        int bsize = crypto_cipher_blocksize(tfm);
        unsigned int nbytes = walk->nbytes;
        u8 *src = walk->src.virt.addr;
@@ -82,7 +77,7 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
        do {
                memcpy(tmpbuf, src, bsize);
                crypto_xor(iv, src, bsize);
-               fn(crypto_cipher_tfm(tfm), src, iv);
+               crypto_cipher_encrypt_one(tfm, src, iv);
                memcpy(iv, tmpbuf, bsize);
                crypto_xor(iv, src, bsize);
 
@@ -94,38 +89,34 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
        return nbytes;
 }
 
-static int crypto_pcbc_encrypt(struct blkcipher_desc *desc,
-                              struct scatterlist *dst, struct scatterlist *src,
-                              unsigned int nbytes)
+static int crypto_pcbc_encrypt(struct skcipher_request *req)
 {
-       struct blkcipher_walk walk;
-       struct crypto_blkcipher *tfm = desc->tfm;
-       struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct crypto_cipher *child = ctx->child;
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, false);
 
        while ((nbytes = walk.nbytes)) {
                if (walk.src.virt.addr == walk.dst.virt.addr)
-                       nbytes = crypto_pcbc_encrypt_inplace(desc, &walk,
+                       nbytes = crypto_pcbc_encrypt_inplace(req, &walk,
                                                             child);
                else
-                       nbytes = crypto_pcbc_encrypt_segment(desc, &walk,
+                       nbytes = crypto_pcbc_encrypt_segment(req, &walk,
                                                             child);
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+               err = skcipher_walk_done(&walk, nbytes);
        }
 
        return err;
 }
 
-static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
-                                      struct blkcipher_walk *walk,
+static int crypto_pcbc_decrypt_segment(struct skcipher_request *req,
+                                      struct skcipher_walk *walk,
                                       struct crypto_cipher *tfm)
 {
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_decrypt;
        int bsize = crypto_cipher_blocksize(tfm);
        unsigned int nbytes = walk->nbytes;
        u8 *src = walk->src.virt.addr;
@@ -133,7 +124,7 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
        u8 *iv = walk->iv;
 
        do {
-               fn(crypto_cipher_tfm(tfm), dst, src);
+               crypto_cipher_decrypt_one(tfm, dst, src);
                crypto_xor(dst, iv, bsize);
                memcpy(iv, src, bsize);
                crypto_xor(iv, dst, bsize);
@@ -147,21 +138,19 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
        return nbytes;
 }
 
-static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc,
-                                      struct blkcipher_walk *walk,
+static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
+                                      struct skcipher_walk *walk,
                                       struct crypto_cipher *tfm)
 {
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-               crypto_cipher_alg(tfm)->cia_decrypt;
        int bsize = crypto_cipher_blocksize(tfm);
        unsigned int nbytes = walk->nbytes;
        u8 *src = walk->src.virt.addr;
        u8 *iv = walk->iv;
-       u8 tmpbuf[bsize];
+       u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32))));
 
        do {
                memcpy(tmpbuf, src, bsize);
-               fn(crypto_cipher_tfm(tfm), src, src);
+               crypto_cipher_decrypt_one(tfm, src, src);
                crypto_xor(src, iv, bsize);
                memcpy(iv, tmpbuf, bsize);
                crypto_xor(iv, src, bsize);
@@ -174,37 +163,35 @@ static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc,
        return nbytes;
 }
 
-static int crypto_pcbc_decrypt(struct blkcipher_desc *desc,
-                              struct scatterlist *dst, struct scatterlist *src,
-                              unsigned int nbytes)
+static int crypto_pcbc_decrypt(struct skcipher_request *req)
 {
-       struct blkcipher_walk walk;
-       struct crypto_blkcipher *tfm = desc->tfm;
-       struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct crypto_cipher *child = ctx->child;
+       struct skcipher_walk walk;
+       unsigned int nbytes;
        int err;
 
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt(desc, &walk);
+       err = skcipher_walk_virt(&walk, req, false);
 
        while ((nbytes = walk.nbytes)) {
                if (walk.src.virt.addr == walk.dst.virt.addr)
-                       nbytes = crypto_pcbc_decrypt_inplace(desc, &walk,
+                       nbytes = crypto_pcbc_decrypt_inplace(req, &walk,
                                                             child);
                else
-                       nbytes = crypto_pcbc_decrypt_segment(desc, &walk,
+                       nbytes = crypto_pcbc_decrypt_segment(req, &walk,
                                                             child);
-               err = blkcipher_walk_done(desc, &walk, nbytes);
+               err = skcipher_walk_done(&walk, nbytes);
        }
 
        return err;
 }
 
-static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
+static int crypto_pcbc_init_tfm(struct crypto_skcipher *tfm)
 {
-       struct crypto_instance *inst = (void *)tfm->__crt_alg;
-       struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-       struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+       struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+       struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
        struct crypto_cipher *cipher;
 
        cipher = crypto_spawn_cipher(spawn);
@@ -215,68 +202,98 @@ static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
        return 0;
 }
 
-static void crypto_pcbc_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_pcbc_exit_tfm(struct crypto_skcipher *tfm)
 {
-       struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
        crypto_free_cipher(ctx->child);
 }
 
-static struct crypto_instance *crypto_pcbc_alloc(struct rtattr **tb)
+static void crypto_pcbc_free(struct skcipher_instance *inst)
+{
+       crypto_drop_skcipher(skcipher_instance_ctx(inst));
+       kfree(inst);
+}
+
+static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_instance *inst;
+       struct skcipher_instance *inst;
+       struct crypto_attr_type *algt;
+       struct crypto_spawn *spawn;
        struct crypto_alg *alg;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
-       if (err)
-               return ERR_PTR(err);
+       algt = crypto_get_attr_type(tb);
+       if (IS_ERR(algt))
+               return PTR_ERR(algt);
+
+       if (((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) &
+           ~CRYPTO_ALG_INTERNAL)
+               return -EINVAL;
 
-       alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-                                 CRYPTO_ALG_TYPE_MASK);
+       inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+       if (!inst)
+               return -ENOMEM;
+
+       alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER |
+                                     (algt->type & CRYPTO_ALG_INTERNAL),
+                                 CRYPTO_ALG_TYPE_MASK |
+                                 (algt->mask & CRYPTO_ALG_INTERNAL));
+       err = PTR_ERR(alg);
        if (IS_ERR(alg))
-               return ERR_CAST(alg);
+               goto err_free_inst;
+
+       spawn = skcipher_instance_ctx(inst);
+       err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+                               CRYPTO_ALG_TYPE_MASK);
+       crypto_mod_put(alg);
+       if (err)
+               goto err_free_inst;
 
-       inst = crypto_alloc_instance("pcbc", alg);
-       if (IS_ERR(inst))
-               goto out_put_alg;
+       err = crypto_inst_setname(skcipher_crypto_instance(inst), "pcbc", alg);
+       if (err)
+               goto err_drop_spawn;
 
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-       inst->alg.cra_priority = alg->cra_priority;
-       inst->alg.cra_blocksize = alg->cra_blocksize;
-       inst->alg.cra_alignmask = alg->cra_alignmask;
-       inst->alg.cra_type = &crypto_blkcipher_type;
+       inst->alg.base.cra_flags = alg->cra_flags & CRYPTO_ALG_INTERNAL;
+       inst->alg.base.cra_priority = alg->cra_priority;
+       inst->alg.base.cra_blocksize = alg->cra_blocksize;
+       inst->alg.base.cra_alignmask = alg->cra_alignmask;
 
        /* We access the data as u32s when xoring. */
-       inst->alg.cra_alignmask |= __alignof__(u32) - 1;
+       inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
 
-       inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-       inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
-       inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+       inst->alg.ivsize = alg->cra_blocksize;
+       inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+       inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
 
-       inst->alg.cra_ctxsize = sizeof(struct crypto_pcbc_ctx);
+       inst->alg.base.cra_ctxsize = sizeof(struct crypto_pcbc_ctx);
 
-       inst->alg.cra_init = crypto_pcbc_init_tfm;
-       inst->alg.cra_exit = crypto_pcbc_exit_tfm;
+       inst->alg.init = crypto_pcbc_init_tfm;
+       inst->alg.exit = crypto_pcbc_exit_tfm;
 
-       inst->alg.cra_blkcipher.setkey = crypto_pcbc_setkey;
-       inst->alg.cra_blkcipher.encrypt = crypto_pcbc_encrypt;
-       inst->alg.cra_blkcipher.decrypt = crypto_pcbc_decrypt;
+       inst->alg.setkey = crypto_pcbc_setkey;
+       inst->alg.encrypt = crypto_pcbc_encrypt;
+       inst->alg.decrypt = crypto_pcbc_decrypt;
 
-out_put_alg:
-       crypto_mod_put(alg);
-       return inst;
-}
+       inst->free = crypto_pcbc_free;
 
-static void crypto_pcbc_free(struct crypto_instance *inst)
-{
-       crypto_drop_spawn(crypto_instance_ctx(inst));
+       err = skcipher_register_instance(tmpl, inst);
+       if (err)
+               goto err_drop_spawn;
+
+out:
+       return err;
+
+err_drop_spawn:
+       crypto_drop_spawn(spawn);
+err_free_inst:
        kfree(inst);
+       goto out;
 }
 
 static struct crypto_template crypto_pcbc_tmpl = {
        .name = "pcbc",
-       .alloc = crypto_pcbc_alloc,
-       .free = crypto_pcbc_free,
+       .create = crypto_pcbc_create,
        .module = THIS_MODULE,
 };
 
index 2df9835..b1c2d57 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <asm/unaligned.h>
 
 static inline u64 mlt(u64 a, u64 b)
 {
@@ -33,11 +34,6 @@ static inline u32 and(u32 v, u32 mask)
        return v & mask;
 }
 
-static inline u32 le32_to_cpuvp(const void *p)
-{
-       return le32_to_cpup(p);
-}
-
 int crypto_poly1305_init(struct shash_desc *desc)
 {
        struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -65,19 +61,19 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
 static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
 {
        /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-       dctx->r[0] = (le32_to_cpuvp(key +  0) >> 0) & 0x3ffffff;
-       dctx->r[1] = (le32_to_cpuvp(key +  3) >> 2) & 0x3ffff03;
-       dctx->r[2] = (le32_to_cpuvp(key +  6) >> 4) & 0x3ffc0ff;
-       dctx->r[3] = (le32_to_cpuvp(key +  9) >> 6) & 0x3f03fff;
-       dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
+       dctx->r[0] = (get_unaligned_le32(key +  0) >> 0) & 0x3ffffff;
+       dctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
+       dctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
+       dctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
+       dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
 }
 
 static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
 {
-       dctx->s[0] = le32_to_cpuvp(key +  0);
-       dctx->s[1] = le32_to_cpuvp(key +  4);
-       dctx->s[2] = le32_to_cpuvp(key +  8);
-       dctx->s[3] = le32_to_cpuvp(key + 12);
+       dctx->s[0] = get_unaligned_le32(key +  0);
+       dctx->s[1] = get_unaligned_le32(key +  4);
+       dctx->s[2] = get_unaligned_le32(key +  8);
+       dctx->s[3] = get_unaligned_le32(key + 12);
 }
 
 unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
@@ -137,11 +133,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
        while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
 
                /* h += m[i] */
-               h0 += (le32_to_cpuvp(src +  0) >> 0) & 0x3ffffff;
-               h1 += (le32_to_cpuvp(src +  3) >> 2) & 0x3ffffff;
-               h2 += (le32_to_cpuvp(src +  6) >> 4) & 0x3ffffff;
-               h3 += (le32_to_cpuvp(src +  9) >> 6) & 0x3ffffff;
-               h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
+               h0 += (get_unaligned_le32(src +  0) >> 0) & 0x3ffffff;
+               h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
+               h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
+               h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
+               h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
 
                /* h *= r */
                d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
diff --git a/crypto/scompress.c b/crypto/scompress.c
new file mode 100644 (file)
index 0000000..35e396d
--- /dev/null
@@ -0,0 +1,356 @@
+/*
+ * Synchronous Compression operations
+ *
+ * Copyright 2015 LG Electronics Inc.
+ * Copyright (c) 2016, Intel Corporation
+ * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <crypto/algapi.h>
+#include <linux/cryptouser.h>
+#include <net/netlink.h>
+#include <linux/scatterlist.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/acompress.h>
+#include <crypto/internal/scompress.h>
+#include "internal.h"
+
+static const struct crypto_type crypto_scomp_type;
+static void * __percpu *scomp_src_scratches;
+static void * __percpu *scomp_dst_scratches;
+static int scomp_scratch_users;
+static DEFINE_MUTEX(scomp_lock);
+
+#ifdef CONFIG_NET
+static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+       struct crypto_report_comp rscomp;
+
+       strncpy(rscomp.type, "scomp", sizeof(rscomp.type));
+
+       if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
+                   sizeof(struct crypto_report_comp), &rscomp))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -EMSGSIZE;
+}
+#else
+static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+       return -ENOSYS;
+}
+#endif
+
+static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
+       __attribute__ ((unused));
+
+static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg)
+{
+       seq_puts(m, "type         : scomp\n");
+}
+
+static int crypto_scomp_init_tfm(struct crypto_tfm *tfm)
+{
+       return 0;
+}
+
+static void crypto_scomp_free_scratches(void * __percpu *scratches)
+{
+       int i;
+
+       if (!scratches)
+               return;
+
+       for_each_possible_cpu(i)
+               vfree(*per_cpu_ptr(scratches, i));
+
+       free_percpu(scratches);
+}
+
+static void * __percpu *crypto_scomp_alloc_scratches(void)
+{
+       void * __percpu *scratches;
+       int i;
+
+       scratches = alloc_percpu(void *);
+       if (!scratches)
+               return NULL;
+
+       for_each_possible_cpu(i) {
+               void *scratch;
+
+               scratch = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i));
+               if (!scratch)
+                       goto error;
+               *per_cpu_ptr(scratches, i) = scratch;
+       }
+
+       return scratches;
+
+error:
+       crypto_scomp_free_scratches(scratches);
+       return NULL;
+}
+
+static void crypto_scomp_free_all_scratches(void)
+{
+       if (!--scomp_scratch_users) {
+               crypto_scomp_free_scratches(scomp_src_scratches);
+               crypto_scomp_free_scratches(scomp_dst_scratches);
+               scomp_src_scratches = NULL;
+               scomp_dst_scratches = NULL;
+       }
+}
+
+static int crypto_scomp_alloc_all_scratches(void)
+{
+       if (!scomp_scratch_users++) {
+               scomp_src_scratches = crypto_scomp_alloc_scratches();
+               if (!scomp_src_scratches)
+                       return -ENOMEM;
+               scomp_dst_scratches = crypto_scomp_alloc_scratches();
+               if (!scomp_dst_scratches)
+                       return -ENOMEM;
+       }
+       return 0;
+}
+
+static void crypto_scomp_sg_free(struct scatterlist *sgl)
+{
+       int i, n;
+       struct page *page;
+
+       if (!sgl)
+               return;
+
+       n = sg_nents(sgl);
+       for_each_sg(sgl, sgl, n, i) {
+               page = sg_page(sgl);
+               if (page)
+                       __free_page(page);
+       }
+
+       kfree(sgl);
+}
+
+static struct scatterlist *crypto_scomp_sg_alloc(size_t size, gfp_t gfp)
+{
+       struct scatterlist *sgl;
+       struct page *page;
+       int i, n;
+
+       n = ((size - 1) >> PAGE_SHIFT) + 1;
+
+       sgl = kmalloc_array(n, sizeof(struct scatterlist), gfp);
+       if (!sgl)
+               return NULL;
+
+       sg_init_table(sgl, n);
+
+       for (i = 0; i < n; i++) {
+               page = alloc_page(gfp);
+               if (!page)
+                       goto err;
+               sg_set_page(sgl + i, page, PAGE_SIZE, 0);
+       }
+
+       return sgl;
+
+err:
+       sg_mark_end(sgl + i);
+       crypto_scomp_sg_free(sgl);
+       return NULL;
+}
+
+static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir)
+{
+       struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+       void **tfm_ctx = acomp_tfm_ctx(tfm);
+       struct crypto_scomp *scomp = *tfm_ctx;
+       void **ctx = acomp_request_ctx(req);
+       const int cpu = get_cpu();
+       u8 *scratch_src = *per_cpu_ptr(scomp_src_scratches, cpu);
+       u8 *scratch_dst = *per_cpu_ptr(scomp_dst_scratches, cpu);
+       int ret;
+
+       if (!req->src || !req->slen || req->slen > SCOMP_SCRATCH_SIZE) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (req->dst && !req->dlen) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (!req->dlen || req->dlen > SCOMP_SCRATCH_SIZE)
+               req->dlen = SCOMP_SCRATCH_SIZE;
+
+       scatterwalk_map_and_copy(scratch_src, req->src, 0, req->slen, 0);
+       if (dir)
+               ret = crypto_scomp_compress(scomp, scratch_src, req->slen,
+                                           scratch_dst, &req->dlen, *ctx);
+       else
+               ret = crypto_scomp_decompress(scomp, scratch_src, req->slen,
+                                             scratch_dst, &req->dlen, *ctx);
+       if (!ret) {
+               if (!req->dst) {
+                       req->dst = crypto_scomp_sg_alloc(req->dlen,
+                                  req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                                  GFP_KERNEL : GFP_ATOMIC);
+                       if (!req->dst)
+                               goto out;
+               }
+               scatterwalk_map_and_copy(scratch_dst, req->dst, 0, req->dlen,
+                                        1);
+       }
+out:
+       put_cpu();
+       return ret;
+}
+
+static int scomp_acomp_compress(struct acomp_req *req)
+{
+       return scomp_acomp_comp_decomp(req, 1);
+}
+
+static int scomp_acomp_decompress(struct acomp_req *req)
+{
+       return scomp_acomp_comp_decomp(req, 0);
+}
+
+static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm)
+{
+       struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
+
+       crypto_free_scomp(*ctx);
+}
+
+int crypto_init_scomp_ops_async(struct crypto_tfm *tfm)
+{
+       struct crypto_alg *calg = tfm->__crt_alg;
+       struct crypto_acomp *crt = __crypto_acomp_tfm(tfm);
+       struct crypto_scomp **ctx = crypto_tfm_ctx(tfm);
+       struct crypto_scomp *scomp;
+
+       if (!crypto_mod_get(calg))
+               return -EAGAIN;
+
+       scomp = crypto_create_tfm(calg, &crypto_scomp_type);
+       if (IS_ERR(scomp)) {
+               crypto_mod_put(calg);
+               return PTR_ERR(scomp);
+       }
+
+       *ctx = scomp;
+       tfm->exit = crypto_exit_scomp_ops_async;
+
+       crt->compress = scomp_acomp_compress;
+       crt->decompress = scomp_acomp_decompress;
+       crt->dst_free = crypto_scomp_sg_free;
+       crt->reqsize = sizeof(void *);
+
+       return 0;
+}
+
+struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req)
+{
+       struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
+       struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+       struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm);
+       struct crypto_scomp *scomp = *tfm_ctx;
+       void *ctx;
+
+       ctx = crypto_scomp_alloc_ctx(scomp);
+       if (IS_ERR(ctx)) {
+               kfree(req);
+               return NULL;
+       }
+
+       *req->__ctx = ctx;
+
+       return req;
+}
+
+void crypto_acomp_scomp_free_ctx(struct acomp_req *req)
+{
+       struct crypto_acomp *acomp = crypto_acomp_reqtfm(req);
+       struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
+       struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm);
+       struct crypto_scomp *scomp = *tfm_ctx;
+       void *ctx = *req->__ctx;
+
+       if (ctx)
+               crypto_scomp_free_ctx(scomp, ctx);
+}
+
+static const struct crypto_type crypto_scomp_type = {
+       .extsize = crypto_alg_extsize,
+       .init_tfm = crypto_scomp_init_tfm,
+#ifdef CONFIG_PROC_FS
+       .show = crypto_scomp_show,
+#endif
+       .report = crypto_scomp_report,
+       .maskclear = ~CRYPTO_ALG_TYPE_MASK,
+       .maskset = CRYPTO_ALG_TYPE_MASK,
+       .type = CRYPTO_ALG_TYPE_SCOMPRESS,
+       .tfmsize = offsetof(struct crypto_scomp, base),
+};
+
+int crypto_register_scomp(struct scomp_alg *alg)
+{
+       struct crypto_alg *base = &alg->base;
+       int ret = -ENOMEM;
+
+       mutex_lock(&scomp_lock);
+       if (crypto_scomp_alloc_all_scratches())
+               goto error;
+
+       base->cra_type = &crypto_scomp_type;
+       base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+       base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS;
+
+       ret = crypto_register_alg(base);
+       if (ret)
+               goto error;
+
+       mutex_unlock(&scomp_lock);
+       return ret;
+
+error:
+       crypto_scomp_free_all_scratches();
+       mutex_unlock(&scomp_lock);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_scomp);
+
+int crypto_unregister_scomp(struct scomp_alg *alg)
+{
+       int ret;
+
+       mutex_lock(&scomp_lock);
+       ret = crypto_unregister_alg(&alg->base);
+       crypto_scomp_free_all_scratches();
+       mutex_unlock(&scomp_lock);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_scomp);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Synchronous compression type");
diff --git a/crypto/simd.c b/crypto/simd.c
new file mode 100644 (file)
index 0000000..8820337
--- /dev/null
@@ -0,0 +1,226 @@
+/*
+ * Shared crypto simd helpers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ * Copyright (c) 2016 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * Based on aesni-intel_glue.c by:
+ *  Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <asm/simd.h>
+
+struct simd_skcipher_alg {
+       const char *ialg_name;
+       struct skcipher_alg alg;
+};
+
+struct simd_skcipher_ctx {
+       struct cryptd_skcipher *cryptd_tfm;
+};
+
+static int simd_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+                               unsigned int key_len)
+{
+       struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *child = &ctx->cryptd_tfm->base;
+       int err;
+
+       crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(tfm) &
+                                        CRYPTO_TFM_REQ_MASK);
+       err = crypto_skcipher_setkey(child, key, key_len);
+       crypto_skcipher_set_flags(tfm, crypto_skcipher_get_flags(child) &
+                                      CRYPTO_TFM_RES_MASK);
+       return err;
+}
+
+static int simd_skcipher_encrypt(struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_request *subreq;
+       struct crypto_skcipher *child;
+
+       subreq = skcipher_request_ctx(req);
+       *subreq = *req;
+
+       if (!may_use_simd() ||
+           (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
+               child = &ctx->cryptd_tfm->base;
+       else
+               child = cryptd_skcipher_child(ctx->cryptd_tfm);
+
+       skcipher_request_set_tfm(subreq, child);
+
+       return crypto_skcipher_encrypt(subreq);
+}
+
+static int simd_skcipher_decrypt(struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct skcipher_request *subreq;
+       struct crypto_skcipher *child;
+
+       subreq = skcipher_request_ctx(req);
+       *subreq = *req;
+
+       if (!may_use_simd() ||
+           (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
+               child = &ctx->cryptd_tfm->base;
+       else
+               child = cryptd_skcipher_child(ctx->cryptd_tfm);
+
+       skcipher_request_set_tfm(subreq, child);
+
+       return crypto_skcipher_decrypt(subreq);
+}
+
+static void simd_skcipher_exit(struct crypto_skcipher *tfm)
+{
+       struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+       cryptd_free_skcipher(ctx->cryptd_tfm);
+}
+
+static int simd_skcipher_init(struct crypto_skcipher *tfm)
+{
+       struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+       struct cryptd_skcipher *cryptd_tfm;
+       struct simd_skcipher_alg *salg;
+       struct skcipher_alg *alg;
+       unsigned reqsize;
+
+       alg = crypto_skcipher_alg(tfm);
+       salg = container_of(alg, struct simd_skcipher_alg, alg);
+
+       cryptd_tfm = cryptd_alloc_skcipher(salg->ialg_name,
+                                          CRYPTO_ALG_INTERNAL,
+                                          CRYPTO_ALG_INTERNAL);
+       if (IS_ERR(cryptd_tfm))
+               return PTR_ERR(cryptd_tfm);
+
+       ctx->cryptd_tfm = cryptd_tfm;
+
+       reqsize = sizeof(struct skcipher_request);
+       reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base);
+
+       crypto_skcipher_set_reqsize(tfm, reqsize);
+
+       return 0;
+}
+
+struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
+                                                     const char *drvname,
+                                                     const char *basename)
+{
+       struct simd_skcipher_alg *salg;
+       struct crypto_skcipher *tfm;
+       struct skcipher_alg *ialg;
+       struct skcipher_alg *alg;
+       int err;
+
+       tfm = crypto_alloc_skcipher(basename, CRYPTO_ALG_INTERNAL,
+                                   CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
+       if (IS_ERR(tfm))
+               return ERR_CAST(tfm);
+
+       ialg = crypto_skcipher_alg(tfm);
+
+       salg = kzalloc(sizeof(*salg), GFP_KERNEL);
+       if (!salg) {
+               salg = ERR_PTR(-ENOMEM);
+               goto out_put_tfm;
+       }
+
+       salg->ialg_name = basename;
+       alg = &salg->alg;
+
+       err = -ENAMETOOLONG;
+       if (snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", algname) >=
+           CRYPTO_MAX_ALG_NAME)
+               goto out_free_salg;
+
+       if (snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+                    drvname) >= CRYPTO_MAX_ALG_NAME)
+               goto out_free_salg;
+
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC;
+       alg->base.cra_priority = ialg->base.cra_priority;
+       alg->base.cra_blocksize = ialg->base.cra_blocksize;
+       alg->base.cra_alignmask = ialg->base.cra_alignmask;
+       alg->base.cra_module = ialg->base.cra_module;
+       alg->base.cra_ctxsize = sizeof(struct simd_skcipher_ctx);
+
+       alg->ivsize = ialg->ivsize;
+       alg->chunksize = ialg->chunksize;
+       alg->min_keysize = ialg->min_keysize;
+       alg->max_keysize = ialg->max_keysize;
+
+       alg->init = simd_skcipher_init;
+       alg->exit = simd_skcipher_exit;
+
+       alg->setkey = simd_skcipher_setkey;
+       alg->encrypt = simd_skcipher_encrypt;
+       alg->decrypt = simd_skcipher_decrypt;
+
+       err = crypto_register_skcipher(alg);
+       if (err)
+               goto out_free_salg;
+
+out_put_tfm:
+       crypto_free_skcipher(tfm);
+       return salg;
+
+out_free_salg:
+       kfree(salg);
+       salg = ERR_PTR(err);
+       goto out_put_tfm;
+}
+EXPORT_SYMBOL_GPL(simd_skcipher_create_compat);
+
+struct simd_skcipher_alg *simd_skcipher_create(const char *algname,
+                                              const char *basename)
+{
+       char drvname[CRYPTO_MAX_ALG_NAME];
+
+       if (snprintf(drvname, CRYPTO_MAX_ALG_NAME, "simd-%s", basename) >=
+           CRYPTO_MAX_ALG_NAME)
+               return ERR_PTR(-ENAMETOOLONG);
+
+       return simd_skcipher_create_compat(algname, drvname, basename);
+}
+EXPORT_SYMBOL_GPL(simd_skcipher_create);
+
+void simd_skcipher_free(struct simd_skcipher_alg *salg)
+{
+       crypto_unregister_skcipher(&salg->alg);
+       kfree(salg);
+}
+EXPORT_SYMBOL_GPL(simd_skcipher_free);
+
+MODULE_LICENSE("GPL");
index f7d0018..aca07c6 100644 (file)
  *
  */
 
+#include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/bug.h>
 #include <linux/cryptouser.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/rtnetlink.h>
 #include <linux/seq_file.h>
 
 #include "internal.h"
 
+enum {
+       SKCIPHER_WALK_PHYS = 1 << 0,
+       SKCIPHER_WALK_SLOW = 1 << 1,
+       SKCIPHER_WALK_COPY = 1 << 2,
+       SKCIPHER_WALK_DIFF = 1 << 3,
+       SKCIPHER_WALK_SLEEP = 1 << 4,
+};
+
+struct skcipher_walk_buffer {
+       struct list_head entry;
+       struct scatter_walk dst;
+       unsigned int len;
+       u8 *data;
+       u8 buffer[];
+};
+
+static int skcipher_walk_next(struct skcipher_walk *walk);
+
+static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr)
+{
+       if (PageHighMem(scatterwalk_page(walk)))
+               kunmap_atomic(vaddr);
+}
+
+static inline void *skcipher_map(struct scatter_walk *walk)
+{
+       struct page *page = scatterwalk_page(walk);
+
+       return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) +
+              offset_in_page(walk->offset);
+}
+
+static inline void skcipher_map_src(struct skcipher_walk *walk)
+{
+       walk->src.virt.addr = skcipher_map(&walk->in);
+}
+
+static inline void skcipher_map_dst(struct skcipher_walk *walk)
+{
+       walk->dst.virt.addr = skcipher_map(&walk->out);
+}
+
+static inline void skcipher_unmap_src(struct skcipher_walk *walk)
+{
+       skcipher_unmap(&walk->in, walk->src.virt.addr);
+}
+
+static inline void skcipher_unmap_dst(struct skcipher_walk *walk)
+{
+       skcipher_unmap(&walk->out, walk->dst.virt.addr);
+}
+
+static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk)
+{
+       return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
+}
+
+/* Get a spot of the specified length that does not straddle a page.
+ * The caller needs to ensure that there is enough space for this operation.
+ */
+static inline u8 *skcipher_get_spot(u8 *start, unsigned int len)
+{
+       u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
+
+       return max(start, end_page);
+}
+
+static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize)
+{
+       u8 *addr;
+
+       addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
+       addr = skcipher_get_spot(addr, bsize);
+       scatterwalk_copychunks(addr, &walk->out, bsize,
+                              (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1);
+       return 0;
+}
+
+int skcipher_walk_done(struct skcipher_walk *walk, int err)
+{
+       unsigned int n = walk->nbytes - err;
+       unsigned int nbytes;
+
+       nbytes = walk->total - n;
+
+       if (unlikely(err < 0)) {
+               nbytes = 0;
+               n = 0;
+       } else if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS |
+                                          SKCIPHER_WALK_SLOW |
+                                          SKCIPHER_WALK_COPY |
+                                          SKCIPHER_WALK_DIFF)))) {
+unmap_src:
+               skcipher_unmap_src(walk);
+       } else if (walk->flags & SKCIPHER_WALK_DIFF) {
+               skcipher_unmap_dst(walk);
+               goto unmap_src;
+       } else if (walk->flags & SKCIPHER_WALK_COPY) {
+               skcipher_map_dst(walk);
+               memcpy(walk->dst.virt.addr, walk->page, n);
+               skcipher_unmap_dst(walk);
+       } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) {
+               if (WARN_ON(err)) {
+                       err = -EINVAL;
+                       nbytes = 0;
+               } else
+                       n = skcipher_done_slow(walk, n);
+       }
+
+       if (err > 0)
+               err = 0;
+
+       walk->total = nbytes;
+       walk->nbytes = nbytes;
+
+       scatterwalk_advance(&walk->in, n);
+       scatterwalk_advance(&walk->out, n);
+       scatterwalk_done(&walk->in, 0, nbytes);
+       scatterwalk_done(&walk->out, 1, nbytes);
+
+       if (nbytes) {
+               crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ?
+                            CRYPTO_TFM_REQ_MAY_SLEEP : 0);
+               return skcipher_walk_next(walk);
+       }
+
+       /* Short-circuit for the common/fast path. */
+       if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
+               goto out;
+
+       if (walk->flags & SKCIPHER_WALK_PHYS)
+               goto out;
+
+       if (walk->iv != walk->oiv)
+               memcpy(walk->oiv, walk->iv, walk->ivsize);
+       if (walk->buffer != walk->page)
+               kfree(walk->buffer);
+       if (walk->page)
+               free_page((unsigned long)walk->page);
+
+out:
+       return err;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_done);
+
+void skcipher_walk_complete(struct skcipher_walk *walk, int err)
+{
+       struct skcipher_walk_buffer *p, *tmp;
+
+       list_for_each_entry_safe(p, tmp, &walk->buffers, entry) {
+               u8 *data;
+
+               if (err)
+                       goto done;
+
+               data = p->data;
+               if (!data) {
+                       data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1);
+                       data = skcipher_get_spot(data, walk->chunksize);
+               }
+
+               scatterwalk_copychunks(data, &p->dst, p->len, 1);
+
+               if (offset_in_page(p->data) + p->len + walk->chunksize >
+                   PAGE_SIZE)
+                       free_page((unsigned long)p->data);
+
+done:
+               list_del(&p->entry);
+               kfree(p);
+       }
+
+       if (!err && walk->iv != walk->oiv)
+               memcpy(walk->oiv, walk->iv, walk->ivsize);
+       if (walk->buffer != walk->page)
+               kfree(walk->buffer);
+       if (walk->page)
+               free_page((unsigned long)walk->page);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_complete);
+
+static void skcipher_queue_write(struct skcipher_walk *walk,
+                                struct skcipher_walk_buffer *p)
+{
+       p->dst = walk->out;
+       list_add_tail(&p->entry, &walk->buffers);
+}
+
+static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize)
+{
+       bool phys = walk->flags & SKCIPHER_WALK_PHYS;
+       unsigned alignmask = walk->alignmask;
+       struct skcipher_walk_buffer *p;
+       unsigned a;
+       unsigned n;
+       u8 *buffer;
+       void *v;
+
+       if (!phys) {
+               buffer = walk->buffer ?: walk->page;
+               if (buffer)
+                       goto ok;
+       }
+
+       /* Start with the minimum alignment of kmalloc. */
+       a = crypto_tfm_ctx_alignment() - 1;
+       n = bsize;
+
+       if (phys) {
+               /* Calculate the minimum alignment of p->buffer. */
+               a &= (sizeof(*p) ^ (sizeof(*p) - 1)) >> 1;
+               n += sizeof(*p);
+       }
+
+       /* Minimum size to align p->buffer by alignmask. */
+       n += alignmask & ~a;
+
+       /* Minimum size to ensure p->buffer does not straddle a page. */
+       n += (bsize - 1) & ~(alignmask | a);
+
+       v = kzalloc(n, skcipher_walk_gfp(walk));
+       if (!v)
+               return skcipher_walk_done(walk, -ENOMEM);
+
+       if (phys) {
+               p = v;
+               p->len = bsize;
+               skcipher_queue_write(walk, p);
+               buffer = p->buffer;
+       } else {
+               walk->buffer = v;
+               buffer = v;
+       }
+
+ok:
+       walk->dst.virt.addr = PTR_ALIGN(buffer, alignmask + 1);
+       walk->dst.virt.addr = skcipher_get_spot(walk->dst.virt.addr, bsize);
+       walk->src.virt.addr = walk->dst.virt.addr;
+
+       scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0);
+
+       walk->nbytes = bsize;
+       walk->flags |= SKCIPHER_WALK_SLOW;
+
+       return 0;
+}
+
+static int skcipher_next_copy(struct skcipher_walk *walk)
+{
+       struct skcipher_walk_buffer *p;
+       u8 *tmp = walk->page;
+
+       skcipher_map_src(walk);
+       memcpy(tmp, walk->src.virt.addr, walk->nbytes);
+       skcipher_unmap_src(walk);
+
+       walk->src.virt.addr = tmp;
+       walk->dst.virt.addr = tmp;
+
+       if (!(walk->flags & SKCIPHER_WALK_PHYS))
+               return 0;
+
+       p = kmalloc(sizeof(*p), skcipher_walk_gfp(walk));
+       if (!p)
+               return -ENOMEM;
+
+       p->data = walk->page;
+       p->len = walk->nbytes;
+       skcipher_queue_write(walk, p);
+
+       if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize >
+           PAGE_SIZE)
+               walk->page = NULL;
+       else
+               walk->page += walk->nbytes;
+
+       return 0;
+}
+
+static int skcipher_next_fast(struct skcipher_walk *walk)
+{
+       unsigned long diff;
+
+       walk->src.phys.page = scatterwalk_page(&walk->in);
+       walk->src.phys.offset = offset_in_page(walk->in.offset);
+       walk->dst.phys.page = scatterwalk_page(&walk->out);
+       walk->dst.phys.offset = offset_in_page(walk->out.offset);
+
+       if (walk->flags & SKCIPHER_WALK_PHYS)
+               return 0;
+
+       diff = walk->src.phys.offset - walk->dst.phys.offset;
+       diff |= walk->src.virt.page - walk->dst.virt.page;
+
+       skcipher_map_src(walk);
+       walk->dst.virt.addr = walk->src.virt.addr;
+
+       if (diff) {
+               walk->flags |= SKCIPHER_WALK_DIFF;
+               skcipher_map_dst(walk);
+       }
+
+       return 0;
+}
+
+static int skcipher_walk_next(struct skcipher_walk *walk)
+{
+       unsigned int bsize;
+       unsigned int n;
+       int err;
+
+       walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY |
+                        SKCIPHER_WALK_DIFF);
+
+       n = walk->total;
+       bsize = min(walk->chunksize, max(n, walk->blocksize));
+       n = scatterwalk_clamp(&walk->in, n);
+       n = scatterwalk_clamp(&walk->out, n);
+
+       if (unlikely(n < bsize)) {
+               if (unlikely(walk->total < walk->blocksize))
+                       return skcipher_walk_done(walk, -EINVAL);
+
+slow_path:
+               err = skcipher_next_slow(walk, bsize);
+               goto set_phys_lowmem;
+       }
+
+       if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) {
+               if (!walk->page) {
+                       gfp_t gfp = skcipher_walk_gfp(walk);
+
+                       walk->page = (void *)__get_free_page(gfp);
+                       if (!walk->page)
+                               goto slow_path;
+               }
+
+               walk->nbytes = min_t(unsigned, n,
+                                    PAGE_SIZE - offset_in_page(walk->page));
+               walk->flags |= SKCIPHER_WALK_COPY;
+               err = skcipher_next_copy(walk);
+               goto set_phys_lowmem;
+       }
+
+       walk->nbytes = n;
+
+       return skcipher_next_fast(walk);
+
+set_phys_lowmem:
+       if (!err && (walk->flags & SKCIPHER_WALK_PHYS)) {
+               walk->src.phys.page = virt_to_page(walk->src.virt.addr);
+               walk->dst.phys.page = virt_to_page(walk->dst.virt.addr);
+               walk->src.phys.offset &= PAGE_SIZE - 1;
+               walk->dst.phys.offset &= PAGE_SIZE - 1;
+       }
+       return err;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_next);
+
+static int skcipher_copy_iv(struct skcipher_walk *walk)
+{
+       unsigned a = crypto_tfm_ctx_alignment() - 1;
+       unsigned alignmask = walk->alignmask;
+       unsigned ivsize = walk->ivsize;
+       unsigned bs = walk->chunksize;
+       unsigned aligned_bs;
+       unsigned size;
+       u8 *iv;
+
+       aligned_bs = ALIGN(bs, alignmask);
+
+       /* Minimum size to align buffer by alignmask. */
+       size = alignmask & ~a;
+
+       if (walk->flags & SKCIPHER_WALK_PHYS)
+               size += ivsize;
+       else {
+               size += aligned_bs + ivsize;
+
+               /* Minimum size to ensure buffer does not straddle a page. */
+               size += (bs - 1) & ~(alignmask | a);
+       }
+
+       walk->buffer = kmalloc(size, skcipher_walk_gfp(walk));
+       if (!walk->buffer)
+               return -ENOMEM;
+
+       iv = PTR_ALIGN(walk->buffer, alignmask + 1);
+       iv = skcipher_get_spot(iv, bs) + aligned_bs;
+
+       walk->iv = memcpy(iv, walk->iv, walk->ivsize);
+       return 0;
+}
+
+static int skcipher_walk_first(struct skcipher_walk *walk)
+{
+       walk->nbytes = 0;
+
+       if (WARN_ON_ONCE(in_irq()))
+               return -EDEADLK;
+
+       if (unlikely(!walk->total))
+               return 0;
+
+       walk->buffer = NULL;
+       if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
+               int err = skcipher_copy_iv(walk);
+               if (err)
+                       return err;
+       }
+
+       walk->page = NULL;
+       walk->nbytes = walk->total;
+
+       return skcipher_walk_next(walk);
+}
+
+static int skcipher_walk_skcipher(struct skcipher_walk *walk,
+                                 struct skcipher_request *req)
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+
+       scatterwalk_start(&walk->in, req->src);
+       scatterwalk_start(&walk->out, req->dst);
+
+       walk->total = req->cryptlen;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
+
+       walk->flags &= ~SKCIPHER_WALK_SLEEP;
+       walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+                      SKCIPHER_WALK_SLEEP : 0;
+
+       walk->blocksize = crypto_skcipher_blocksize(tfm);
+       walk->chunksize = crypto_skcipher_chunksize(tfm);
+       walk->ivsize = crypto_skcipher_ivsize(tfm);
+       walk->alignmask = crypto_skcipher_alignmask(tfm);
+
+       return skcipher_walk_first(walk);
+}
+
+int skcipher_walk_virt(struct skcipher_walk *walk,
+                      struct skcipher_request *req, bool atomic)
+{
+       int err;
+
+       walk->flags &= ~SKCIPHER_WALK_PHYS;
+
+       err = skcipher_walk_skcipher(walk, req);
+
+       walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0;
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_virt);
+
+void skcipher_walk_atomise(struct skcipher_walk *walk)
+{
+       walk->flags &= ~SKCIPHER_WALK_SLEEP;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_atomise);
+
+int skcipher_walk_async(struct skcipher_walk *walk,
+                       struct skcipher_request *req)
+{
+       walk->flags |= SKCIPHER_WALK_PHYS;
+
+       INIT_LIST_HEAD(&walk->buffers);
+
+       return skcipher_walk_skcipher(walk, req);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_async);
+
+static int skcipher_walk_aead_common(struct skcipher_walk *walk,
+                                    struct aead_request *req, bool atomic)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       int err;
+
+       walk->flags &= ~SKCIPHER_WALK_PHYS;
+
+       scatterwalk_start(&walk->in, req->src);
+       scatterwalk_start(&walk->out, req->dst);
+
+       scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2);
+       scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2);
+
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
+
+       if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
+               walk->flags |= SKCIPHER_WALK_SLEEP;
+       else
+               walk->flags &= ~SKCIPHER_WALK_SLEEP;
+
+       walk->blocksize = crypto_aead_blocksize(tfm);
+       walk->chunksize = crypto_aead_chunksize(tfm);
+       walk->ivsize = crypto_aead_ivsize(tfm);
+       walk->alignmask = crypto_aead_alignmask(tfm);
+
+       err = skcipher_walk_first(walk);
+
+       if (atomic)
+               walk->flags &= ~SKCIPHER_WALK_SLEEP;
+
+       return err;
+}
+
+int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req,
+                      bool atomic)
+{
+       walk->total = req->cryptlen;
+
+       return skcipher_walk_aead_common(walk, req, atomic);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_aead);
+
+int skcipher_walk_aead_encrypt(struct skcipher_walk *walk,
+                              struct aead_request *req, bool atomic)
+{
+       walk->total = req->cryptlen;
+
+       return skcipher_walk_aead_common(walk, req, atomic);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_aead_encrypt);
+
+int skcipher_walk_aead_decrypt(struct skcipher_walk *walk,
+                              struct aead_request *req, bool atomic)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+
+       walk->total = req->cryptlen - crypto_aead_authsize(tfm);
+
+       return skcipher_walk_aead_common(walk, req, atomic);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_aead_decrypt);
+
 static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg)
 {
        if (alg->cra_type == &crypto_blkcipher_type)
index 62dffa0..f616ad7 100644 (file)
@@ -33,6 +33,7 @@
 #include <crypto/drbg.h>
 #include <crypto/akcipher.h>
 #include <crypto/kpp.h>
+#include <crypto/acompress.h>
 
 #include "internal.h"
 
@@ -62,7 +63,7 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
  */
 #define IDX1           32
 #define IDX2           32400
-#define IDX3           1
+#define IDX3           1511
 #define IDX4           8193
 #define IDX5           22222
 #define IDX6           17101
@@ -1442,6 +1443,126 @@ out:
        return ret;
 }
 
+static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate,
+                     struct comp_testvec *dtemplate, int ctcount, int dtcount)
+{
+       const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm));
+       unsigned int i;
+       char *output;
+       int ret;
+       struct scatterlist src, dst;
+       struct acomp_req *req;
+       struct tcrypt_result result;
+
+       output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL);
+       if (!output)
+               return -ENOMEM;
+
+       for (i = 0; i < ctcount; i++) {
+               unsigned int dlen = COMP_BUF_SIZE;
+               int ilen = ctemplate[i].inlen;
+
+               memset(output, 0, dlen);
+               init_completion(&result.completion);
+               sg_init_one(&src, ctemplate[i].input, ilen);
+               sg_init_one(&dst, output, dlen);
+
+               req = acomp_request_alloc(tfm);
+               if (!req) {
+                       pr_err("alg: acomp: request alloc failed for %s\n",
+                              algo);
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               acomp_request_set_params(req, &src, &dst, ilen, dlen);
+               acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                          tcrypt_complete, &result);
+
+               ret = wait_async_op(&result, crypto_acomp_compress(req));
+               if (ret) {
+                       pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n",
+                              i + 1, algo, -ret);
+                       acomp_request_free(req);
+                       goto out;
+               }
+
+               if (req->dlen != ctemplate[i].outlen) {
+                       pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n",
+                              i + 1, algo, req->dlen);
+                       ret = -EINVAL;
+                       acomp_request_free(req);
+                       goto out;
+               }
+
+               if (memcmp(output, ctemplate[i].output, req->dlen)) {
+                       pr_err("alg: acomp: Compression test %d failed for %s\n",
+                              i + 1, algo);
+                       hexdump(output, req->dlen);
+                       ret = -EINVAL;
+                       acomp_request_free(req);
+                       goto out;
+               }
+
+               acomp_request_free(req);
+       }
+
+       for (i = 0; i < dtcount; i++) {
+               unsigned int dlen = COMP_BUF_SIZE;
+               int ilen = dtemplate[i].inlen;
+
+               memset(output, 0, dlen);
+               init_completion(&result.completion);
+               sg_init_one(&src, dtemplate[i].input, ilen);
+               sg_init_one(&dst, output, dlen);
+
+               req = acomp_request_alloc(tfm);
+               if (!req) {
+                       pr_err("alg: acomp: request alloc failed for %s\n",
+                              algo);
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               acomp_request_set_params(req, &src, &dst, ilen, dlen);
+               acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                          tcrypt_complete, &result);
+
+               ret = wait_async_op(&result, crypto_acomp_decompress(req));
+               if (ret) {
+                       pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n",
+                              i + 1, algo, -ret);
+                       acomp_request_free(req);
+                       goto out;
+               }
+
+               if (req->dlen != dtemplate[i].outlen) {
+                       pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n",
+                              i + 1, algo, req->dlen);
+                       ret = -EINVAL;
+                       acomp_request_free(req);
+                       goto out;
+               }
+
+               if (memcmp(output, dtemplate[i].output, req->dlen)) {
+                       pr_err("alg: acomp: Decompression test %d failed for %s\n",
+                              i + 1, algo);
+                       hexdump(output, req->dlen);
+                       ret = -EINVAL;
+                       acomp_request_free(req);
+                       goto out;
+               }
+
+               acomp_request_free(req);
+       }
+
+       ret = 0;
+
+out:
+       kfree(output);
+       return ret;
+}
+
 static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
                      unsigned int tcount)
 {
@@ -1509,7 +1630,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
        struct crypto_aead *tfm;
        int err = 0;
 
-       tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       tfm = crypto_alloc_aead(driver, type, mask);
        if (IS_ERR(tfm)) {
                printk(KERN_ERR "alg: aead: Failed to load transform for %s: "
                       "%ld\n", driver, PTR_ERR(tfm));
@@ -1538,7 +1659,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc,
        struct crypto_cipher *tfm;
        int err = 0;
 
-       tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       tfm = crypto_alloc_cipher(driver, type, mask);
        if (IS_ERR(tfm)) {
                printk(KERN_ERR "alg: cipher: Failed to load transform for "
                       "%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1567,7 +1688,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc,
        struct crypto_skcipher *tfm;
        int err = 0;
 
-       tfm = crypto_alloc_skcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       tfm = crypto_alloc_skcipher(driver, type, mask);
        if (IS_ERR(tfm)) {
                printk(KERN_ERR "alg: skcipher: Failed to load transform for "
                       "%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1593,22 +1714,38 @@ out:
 static int alg_test_comp(const struct alg_test_desc *desc, const char *driver,
                         u32 type, u32 mask)
 {
-       struct crypto_comp *tfm;
+       struct crypto_comp *comp;
+       struct crypto_acomp *acomp;
        int err;
+       u32 algo_type = type & CRYPTO_ALG_TYPE_ACOMPRESS_MASK;
+
+       if (algo_type == CRYPTO_ALG_TYPE_ACOMPRESS) {
+               acomp = crypto_alloc_acomp(driver, type, mask);
+               if (IS_ERR(acomp)) {
+                       pr_err("alg: acomp: Failed to load transform for %s: %ld\n",
+                              driver, PTR_ERR(acomp));
+                       return PTR_ERR(acomp);
+               }
+               err = test_acomp(acomp, desc->suite.comp.comp.vecs,
+                                desc->suite.comp.decomp.vecs,
+                                desc->suite.comp.comp.count,
+                                desc->suite.comp.decomp.count);
+               crypto_free_acomp(acomp);
+       } else {
+               comp = crypto_alloc_comp(driver, type, mask);
+               if (IS_ERR(comp)) {
+                       pr_err("alg: comp: Failed to load transform for %s: %ld\n",
+                              driver, PTR_ERR(comp));
+                       return PTR_ERR(comp);
+               }
 
-       tfm = crypto_alloc_comp(driver, type, mask);
-       if (IS_ERR(tfm)) {
-               printk(KERN_ERR "alg: comp: Failed to load transform for %s: "
-                      "%ld\n", driver, PTR_ERR(tfm));
-               return PTR_ERR(tfm);
-       }
-
-       err = test_comp(tfm, desc->suite.comp.comp.vecs,
-                       desc->suite.comp.decomp.vecs,
-                       desc->suite.comp.comp.count,
-                       desc->suite.comp.decomp.count);
+               err = test_comp(comp, desc->suite.comp.comp.vecs,
+                               desc->suite.comp.decomp.vecs,
+                               desc->suite.comp.comp.count,
+                               desc->suite.comp.decomp.count);
 
-       crypto_free_comp(tfm);
+               crypto_free_comp(comp);
+       }
        return err;
 }
 
@@ -1618,7 +1755,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
        struct crypto_ahash *tfm;
        int err;
 
-       tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       tfm = crypto_alloc_ahash(driver, type, mask);
        if (IS_ERR(tfm)) {
                printk(KERN_ERR "alg: hash: Failed to load transform for %s: "
                       "%ld\n", driver, PTR_ERR(tfm));
@@ -1646,7 +1783,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc,
        if (err)
                goto out;
 
-       tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       tfm = crypto_alloc_shash(driver, type, mask);
        if (IS_ERR(tfm)) {
                printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: "
                       "%ld\n", driver, PTR_ERR(tfm));
@@ -1688,7 +1825,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
        struct crypto_rng *rng;
        int err;
 
-       rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       rng = crypto_alloc_rng(driver, type, mask);
        if (IS_ERR(rng)) {
                printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
                       "%ld\n", driver, PTR_ERR(rng));
@@ -1715,7 +1852,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
        if (!buf)
                return -ENOMEM;
 
-       drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       drng = crypto_alloc_rng(driver, type, mask);
        if (IS_ERR(drng)) {
                printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for "
                       "%s\n", driver);
@@ -1909,7 +2046,7 @@ static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver,
        struct crypto_kpp *tfm;
        int err = 0;
 
-       tfm = crypto_alloc_kpp(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       tfm = crypto_alloc_kpp(driver, type, mask);
        if (IS_ERR(tfm)) {
                pr_err("alg: kpp: Failed to load tfm for %s: %ld\n",
                       driver, PTR_ERR(tfm));
@@ -2068,7 +2205,7 @@ static int alg_test_akcipher(const struct alg_test_desc *desc,
        struct crypto_akcipher *tfm;
        int err = 0;
 
-       tfm = crypto_alloc_akcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
+       tfm = crypto_alloc_akcipher(driver, type, mask);
        if (IS_ERR(tfm)) {
                pr_err("alg: akcipher: Failed to load tfm for %s: %ld\n",
                       driver, PTR_ERR(tfm));
@@ -2091,88 +2228,6 @@ static int alg_test_null(const struct alg_test_desc *desc,
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
        {
-               .alg = "__cbc-cast5-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__cbc-cast6-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__cbc-serpent-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__cbc-serpent-avx2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__cbc-serpent-sse2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__cbc-twofish-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-aes-aesni",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
-               .alg = "__driver-cbc-camellia-aesni",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-camellia-aesni-avx2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-cast5-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-cast6-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-serpent-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-serpent-avx2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-serpent-sse2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-cbc-twofish-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-aes-aesni",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
-               .alg = "__driver-ecb-camellia-aesni",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-camellia-aesni-avx2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-cast5-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-cast6-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-serpent-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-serpent-avx2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-serpent-sse2",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-ecb-twofish-avx",
-               .test = alg_test_null,
-       }, {
-               .alg = "__driver-gcm-aes-aesni",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
-               .alg = "__ghash-pclmulqdqni",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
                .alg = "ansi_cprng",
                .test = alg_test_cprng,
                .suite = {
@@ -2659,55 +2714,6 @@ static const struct alg_test_desc alg_test_descs[] = {
                        }
                }
        }, {
-               .alg = "cryptd(__driver-cbc-aes-aesni)",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
-               .alg = "cryptd(__driver-cbc-camellia-aesni)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-cbc-camellia-aesni-avx2)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-cbc-serpent-avx2)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-aes-aesni)",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
-               .alg = "cryptd(__driver-ecb-camellia-aesni)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-camellia-aesni-avx2)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-cast5-avx)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-cast6-avx)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-serpent-avx)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-serpent-avx2)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-serpent-sse2)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-ecb-twofish-avx)",
-               .test = alg_test_null,
-       }, {
-               .alg = "cryptd(__driver-gcm-aes-aesni)",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
-               .alg = "cryptd(__ghash-pclmulqdqni)",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
                .alg = "ctr(aes)",
                .test = alg_test_skcipher,
                .fips_allowed = 1,
@@ -3034,10 +3040,6 @@ static const struct alg_test_desc alg_test_descs[] = {
                .fips_allowed = 1,
                .test = alg_test_null,
        }, {
-               .alg = "ecb(__aes-aesni)",
-               .test = alg_test_null,
-               .fips_allowed = 1,
-       }, {
                .alg = "ecb(aes)",
                .test = alg_test_skcipher,
                .fips_allowed = 1,
index e64a4ef..9b656be 100644 (file)
@@ -1334,36 +1334,50 @@ static struct hash_testvec rmd320_tv_template[] = {
        }
 };
 
-#define CRCT10DIF_TEST_VECTORS 3
+#define CRCT10DIF_TEST_VECTORS ARRAY_SIZE(crct10dif_tv_template)
 static struct hash_testvec crct10dif_tv_template[] = {
        {
-               .plaintext = "abc",
-               .psize  = 3,
-#ifdef __LITTLE_ENDIAN
-               .digest = "\x3b\x44",
-#else
-               .digest = "\x44\x3b",
-#endif
-       }, {
-               .plaintext = "1234567890123456789012345678901234567890"
-                            "123456789012345678901234567890123456789",
-               .psize  = 79,
-#ifdef __LITTLE_ENDIAN
-               .digest = "\x70\x4b",
-#else
-               .digest = "\x4b\x70",
-#endif
-       }, {
-               .plaintext =
-               "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
-               .psize  = 56,
-#ifdef __LITTLE_ENDIAN
-               .digest = "\xe3\x9c",
-#else
-               .digest = "\x9c\xe3",
-#endif
-               .np     = 2,
-               .tap    = { 28, 28 }
+               .plaintext      = "abc",
+               .psize          = 3,
+               .digest         = (u8 *)(u16 []){ 0x443b },
+       }, {
+               .plaintext      = "1234567890123456789012345678901234567890"
+                                 "123456789012345678901234567890123456789",
+               .psize          = 79,
+               .digest         = (u8 *)(u16 []){ 0x4b70 },
+               .np             = 2,
+               .tap            = { 63, 16 },
+       }, {
+               .plaintext      = "abcdddddddddddddddddddddddddddddddddddddddd"
+                                 "ddddddddddddd",
+               .psize          = 56,
+               .digest         = (u8 *)(u16 []){ 0x9ce3 },
+               .np             = 8,
+               .tap            = { 1, 2, 28, 7, 6, 5, 4, 3 },
+       }, {
+               .plaintext      = "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "123456789012345678901234567890123456789",
+               .psize          = 319,
+               .digest         = (u8 *)(u16 []){ 0x44c6 },
+       }, {
+               .plaintext      = "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "1234567890123456789012345678901234567890"
+                                 "123456789012345678901234567890123456789",
+               .psize          = 319,
+               .digest         = (u8 *)(u16 []){ 0x44c6 },
+               .np             = 4,
+               .tap            = { 1, 255, 57, 6 },
        }
 };
 
index 305343f..410a2e2 100644 (file)
@@ -13,7 +13,8 @@
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  */
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 
+#define XTS_BUFFER_SIZE 128u
+
 struct priv {
-       struct crypto_cipher *child;
+       struct crypto_skcipher *child;
        struct crypto_cipher *tweak;
 };
 
-static int setkey(struct crypto_tfm *parent, const u8 *key,
+struct xts_instance_ctx {
+       struct crypto_skcipher_spawn spawn;
+       char name[CRYPTO_MAX_ALG_NAME];
+};
+
+struct rctx {
+       be128 buf[XTS_BUFFER_SIZE / sizeof(be128)];
+
+       be128 t;
+
+       be128 *ext;
+
+       struct scatterlist srcbuf[2];
+       struct scatterlist dstbuf[2];
+       struct scatterlist *src;
+       struct scatterlist *dst;
+
+       unsigned int left;
+
+       struct skcipher_request subreq;
+};
+
+static int setkey(struct crypto_skcipher *parent, const u8 *key,
                  unsigned int keylen)
 {
-       struct priv *ctx = crypto_tfm_ctx(parent);
-       struct crypto_cipher *child = ctx->tweak;
+       struct priv *ctx = crypto_skcipher_ctx(parent);
+       struct crypto_skcipher *child;
+       struct crypto_cipher *tweak;
        int err;
 
-       err = xts_check_key(parent, key, keylen);
+       err = xts_verify_key(parent, key, keylen);
        if (err)
                return err;
 
+       keylen /= 2;
+
        /* we need two cipher instances: one to compute the initial 'tweak'
         * by encrypting the IV (usually the 'plain' iv) and the other
         * one to encrypt and decrypt the data */
 
        /* tweak cipher, uses Key2 i.e. the second half of *key */
-       crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-       crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+       tweak = ctx->tweak;
+       crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK);
+       crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) &
                                       CRYPTO_TFM_REQ_MASK);
-       err = crypto_cipher_setkey(child, key + keylen/2, keylen/2);
+       err = crypto_cipher_setkey(tweak, key + keylen, keylen);
+       crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(tweak) &
+                                         CRYPTO_TFM_RES_MASK);
        if (err)
                return err;
 
-       crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-                                    CRYPTO_TFM_RES_MASK);
-
+       /* data cipher, uses Key1 i.e. the first half of *key */
        child = ctx->child;
+       crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+       crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+                                        CRYPTO_TFM_REQ_MASK);
+       err = crypto_skcipher_setkey(child, key, keylen);
+       crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+                                         CRYPTO_TFM_RES_MASK);
 
-       /* data cipher, uses Key1 i.e. the first half of *key */
-       crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-       crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
-                                      CRYPTO_TFM_REQ_MASK);
-       err = crypto_cipher_setkey(child, key, keylen/2);
-       if (err)
-               return err;
+       return err;
+}
 
-       crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-                                    CRYPTO_TFM_RES_MASK);
+static int post_crypt(struct skcipher_request *req)
+{
+       struct rctx *rctx = skcipher_request_ctx(req);
+       be128 *buf = rctx->ext ?: rctx->buf;
+       struct skcipher_request *subreq;
+       const int bs = XTS_BLOCK_SIZE;
+       struct skcipher_walk w;
+       struct scatterlist *sg;
+       unsigned offset;
+       int err;
 
-       return 0;
-}
+       subreq = &rctx->subreq;
+       err = skcipher_walk_virt(&w, subreq, false);
 
-struct sinfo {
-       be128 *t;
-       struct crypto_tfm *tfm;
-       void (*fn)(struct crypto_tfm *, u8 *, const u8 *);
-};
+       while (w.nbytes) {
+               unsigned int avail = w.nbytes;
+               be128 *wdst;
 
-static inline void xts_round(struct sinfo *s, void *dst, const void *src)
-{
-       be128_xor(dst, s->t, src);              /* PP <- T xor P */
-       s->fn(s->tfm, dst, dst);                /* CC <- E(Key1,PP) */
-       be128_xor(dst, dst, s->t);              /* C <- T xor CC */
+               wdst = w.dst.virt.addr;
+
+               do {
+                       be128_xor(wdst, buf++, wdst);
+                       wdst++;
+               } while ((avail -= bs) >= bs);
+
+               err = skcipher_walk_done(&w, avail);
+       }
+
+       rctx->left -= subreq->cryptlen;
+
+       if (err || !rctx->left)
+               goto out;
+
+       rctx->dst = rctx->dstbuf;
+
+       scatterwalk_done(&w.out, 0, 1);
+       sg = w.out.sg;
+       offset = w.out.offset;
+
+       if (rctx->dst != sg) {
+               rctx->dst[0] = *sg;
+               sg_unmark_end(rctx->dst);
+               scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
+       }
+       rctx->dst[0].length -= offset - sg->offset;
+       rctx->dst[0].offset = offset;
+
+out:
+       return err;
 }
 
-static int crypt(struct blkcipher_desc *d,
-                struct blkcipher_walk *w, struct priv *ctx,
-                void (*tw)(struct crypto_tfm *, u8 *, const u8 *),
-                void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
+static int pre_crypt(struct skcipher_request *req)
 {
-       int err;
-       unsigned int avail;
+       struct rctx *rctx = skcipher_request_ctx(req);
+       be128 *buf = rctx->ext ?: rctx->buf;
+       struct skcipher_request *subreq;
        const int bs = XTS_BLOCK_SIZE;
-       struct sinfo s = {
-               .tfm = crypto_cipher_tfm(ctx->child),
-               .fn = fn
-       };
-       u8 *wsrc;
-       u8 *wdst;
-
-       err = blkcipher_walk_virt(d, w);
-       if (!w->nbytes)
-               return err;
+       struct skcipher_walk w;
+       struct scatterlist *sg;
+       unsigned cryptlen;
+       unsigned offset;
+       bool more;
+       int err;
 
-       s.t = (be128 *)w->iv;
-       avail = w->nbytes;
+       subreq = &rctx->subreq;
+       cryptlen = subreq->cryptlen;
 
-       wsrc = w->src.virt.addr;
-       wdst = w->dst.virt.addr;
+       more = rctx->left > cryptlen;
+       if (!more)
+               cryptlen = rctx->left;
 
-       /* calculate first value of T */
-       tw(crypto_cipher_tfm(ctx->tweak), w->iv, w->iv);
+       skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
+                                  cryptlen, NULL);
 
-       goto first;
+       err = skcipher_walk_virt(&w, subreq, false);
 
-       for (;;) {
-               do {
-                       gf128mul_x_ble(s.t, s.t);
+       while (w.nbytes) {
+               unsigned int avail = w.nbytes;
+               be128 *wsrc;
+               be128 *wdst;
 
-first:
-                       xts_round(&s, wdst, wsrc);
+               wsrc = w.src.virt.addr;
+               wdst = w.dst.virt.addr;
 
-                       wsrc += bs;
-                       wdst += bs;
+               do {
+                       *buf++ = rctx->t;
+                       be128_xor(wdst++, &rctx->t, wsrc++);
+                       gf128mul_x_ble(&rctx->t, &rctx->t);
                } while ((avail -= bs) >= bs);
 
-               err = blkcipher_walk_done(d, w, avail);
-               if (!w->nbytes)
-                       break;
+               err = skcipher_walk_done(&w, avail);
+       }
+
+       skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
+                                  cryptlen, NULL);
 
-               avail = w->nbytes;
+       if (err || !more)
+               goto out;
 
-               wsrc = w->src.virt.addr;
-               wdst = w->dst.virt.addr;
+       rctx->src = rctx->srcbuf;
+
+       scatterwalk_done(&w.in, 0, 1);
+       sg = w.in.sg;
+       offset = w.in.offset;
+
+       if (rctx->src != sg) {
+               rctx->src[0] = *sg;
+               sg_unmark_end(rctx->src);
+               scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
        }
+       rctx->src[0].length -= offset - sg->offset;
+       rctx->src[0].offset = offset;
 
+out:
        return err;
 }
 
-static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                  struct scatterlist *src, unsigned int nbytes)
+static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
 {
-       struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk w;
+       struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+       struct rctx *rctx = skcipher_request_ctx(req);
+       struct skcipher_request *subreq;
+       gfp_t gfp;
+
+       subreq = &rctx->subreq;
+       skcipher_request_set_tfm(subreq, ctx->child);
+       skcipher_request_set_callback(subreq, req->base.flags, done, req);
+
+       gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+                                                          GFP_ATOMIC;
+       rctx->ext = NULL;
+
+       subreq->cryptlen = XTS_BUFFER_SIZE;
+       if (req->cryptlen > XTS_BUFFER_SIZE) {
+               subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
+               rctx->ext = kmalloc(subreq->cryptlen, gfp);
+       }
+
+       rctx->src = req->src;
+       rctx->dst = req->dst;
+       rctx->left = req->cryptlen;
 
-       blkcipher_walk_init(&w, dst, src, nbytes);
-       return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt,
-                    crypto_cipher_alg(ctx->child)->cia_encrypt);
+       /* calculate first value of T */
+       crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
+
+       return 0;
 }
 
-static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                  struct scatterlist *src, unsigned int nbytes)
+static void exit_crypt(struct skcipher_request *req)
 {
-       struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-       struct blkcipher_walk w;
+       struct rctx *rctx = skcipher_request_ctx(req);
+
+       rctx->left = 0;
 
-       blkcipher_walk_init(&w, dst, src, nbytes);
-       return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt,
-                    crypto_cipher_alg(ctx->child)->cia_decrypt);
+       if (rctx->ext)
+               kzfree(rctx->ext);
+}
+
+static int do_encrypt(struct skcipher_request *req, int err)
+{
+       struct rctx *rctx = skcipher_request_ctx(req);
+       struct skcipher_request *subreq;
+
+       subreq = &rctx->subreq;
+
+       while (!err && rctx->left) {
+               err = pre_crypt(req) ?:
+                     crypto_skcipher_encrypt(subreq) ?:
+                     post_crypt(req);
+
+               if (err == -EINPROGRESS ||
+                   (err == -EBUSY &&
+                    req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return err;
+       }
+
+       exit_crypt(req);
+       return err;
+}
+
+static void encrypt_done(struct crypto_async_request *areq, int err)
+{
+       struct skcipher_request *req = areq->data;
+       struct skcipher_request *subreq;
+       struct rctx *rctx;
+
+       rctx = skcipher_request_ctx(req);
+       subreq = &rctx->subreq;
+       subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+       err = do_encrypt(req, err ?: post_crypt(req));
+       if (rctx->left)
+               return;
+
+       skcipher_request_complete(req, err);
+}
+
+static int encrypt(struct skcipher_request *req)
+{
+       return do_encrypt(req, init_crypt(req, encrypt_done));
+}
+
+static int do_decrypt(struct skcipher_request *req, int err)
+{
+       struct rctx *rctx = skcipher_request_ctx(req);
+       struct skcipher_request *subreq;
+
+       subreq = &rctx->subreq;
+
+       while (!err && rctx->left) {
+               err = pre_crypt(req) ?:
+                     crypto_skcipher_decrypt(subreq) ?:
+                     post_crypt(req);
+
+               if (err == -EINPROGRESS ||
+                   (err == -EBUSY &&
+                    req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return err;
+       }
+
+       exit_crypt(req);
+       return err;
+}
+
+static void decrypt_done(struct crypto_async_request *areq, int err)
+{
+       struct skcipher_request *req = areq->data;
+       struct skcipher_request *subreq;
+       struct rctx *rctx;
+
+       rctx = skcipher_request_ctx(req);
+       subreq = &rctx->subreq;
+       subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+       err = do_decrypt(req, err ?: post_crypt(req));
+       if (rctx->left)
+               return;
+
+       skcipher_request_complete(req, err);
+}
+
+static int decrypt(struct skcipher_request *req)
+{
+       return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
 int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
@@ -233,112 +414,168 @@ first:
 }
 EXPORT_SYMBOL_GPL(xts_crypt);
 
-static int init_tfm(struct crypto_tfm *tfm)
+static int init_tfm(struct crypto_skcipher *tfm)
 {
-       struct crypto_cipher *cipher;
-       struct crypto_instance *inst = (void *)tfm->__crt_alg;
-       struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-       struct priv *ctx = crypto_tfm_ctx(tfm);
-       u32 *flags = &tfm->crt_flags;
-
-       cipher = crypto_spawn_cipher(spawn);
-       if (IS_ERR(cipher))
-               return PTR_ERR(cipher);
-
-       if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) {
-               *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-               crypto_free_cipher(cipher);
-               return -EINVAL;
-       }
+       struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+       struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst);
+       struct priv *ctx = crypto_skcipher_ctx(tfm);
+       struct crypto_skcipher *child;
+       struct crypto_cipher *tweak;
 
-       ctx->child = cipher;
+       child = crypto_spawn_skcipher(&ictx->spawn);
+       if (IS_ERR(child))
+               return PTR_ERR(child);
 
-       cipher = crypto_spawn_cipher(spawn);
-       if (IS_ERR(cipher)) {
-               crypto_free_cipher(ctx->child);
-               return PTR_ERR(cipher);
-       }
+       ctx->child = child;
 
-       /* this check isn't really needed, leave it here just in case */
-       if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) {
-               crypto_free_cipher(cipher);
-               crypto_free_cipher(ctx->child);
-               *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-               return -EINVAL;
+       tweak = crypto_alloc_cipher(ictx->name, 0, 0);
+       if (IS_ERR(tweak)) {
+               crypto_free_skcipher(ctx->child);
+               return PTR_ERR(tweak);
        }
 
-       ctx->tweak = cipher;
+       ctx->tweak = tweak;
+
+       crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) +
+                                        sizeof(struct rctx));
 
        return 0;
 }
 
-static void exit_tfm(struct crypto_tfm *tfm)
+static void exit_tfm(struct crypto_skcipher *tfm)
 {
-       struct priv *ctx = crypto_tfm_ctx(tfm);
-       crypto_free_cipher(ctx->child);
+       struct priv *ctx = crypto_skcipher_ctx(tfm);
+
+       crypto_free_skcipher(ctx->child);
        crypto_free_cipher(ctx->tweak);
 }
 
-static struct crypto_instance *alloc(struct rtattr **tb)
+static void free(struct skcipher_instance *inst)
+{
+       crypto_drop_skcipher(skcipher_instance_ctx(inst));
+       kfree(inst);
+}
+
+static int create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct crypto_instance *inst;
-       struct crypto_alg *alg;
+       struct skcipher_instance *inst;
+       struct crypto_attr_type *algt;
+       struct xts_instance_ctx *ctx;
+       struct skcipher_alg *alg;
+       const char *cipher_name;
        int err;
 
-       err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+       algt = crypto_get_attr_type(tb);
+       if (IS_ERR(algt))
+               return PTR_ERR(algt);
+
+       if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+               return -EINVAL;
+
+       cipher_name = crypto_attr_alg_name(tb[1]);
+       if (IS_ERR(cipher_name))
+               return PTR_ERR(cipher_name);
+
+       inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+       if (!inst)
+               return -ENOMEM;
+
+       ctx = skcipher_instance_ctx(inst);
+
+       crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
+       err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0,
+                                  crypto_requires_sync(algt->type,
+                                                       algt->mask));
+       if (err == -ENOENT) {
+               err = -ENAMETOOLONG;
+               if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
+                            cipher_name) >= CRYPTO_MAX_ALG_NAME)
+                       goto err_free_inst;
+
+               err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0,
+                                          crypto_requires_sync(algt->type,
+                                                               algt->mask));
+       }
+
        if (err)
-               return ERR_PTR(err);
+               goto err_free_inst;
 
-       alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-                                 CRYPTO_ALG_TYPE_MASK);
-       if (IS_ERR(alg))
-               return ERR_CAST(alg);
+       alg = crypto_skcipher_spawn_alg(&ctx->spawn);
 
-       inst = crypto_alloc_instance("xts", alg);
-       if (IS_ERR(inst))
-               goto out_put_alg;
+       err = -EINVAL;
+       if (alg->base.cra_blocksize != XTS_BLOCK_SIZE)
+               goto err_drop_spawn;
 
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-       inst->alg.cra_priority = alg->cra_priority;
-       inst->alg.cra_blocksize = alg->cra_blocksize;
+       if (crypto_skcipher_alg_ivsize(alg))
+               goto err_drop_spawn;
 
-       if (alg->cra_alignmask < 7)
-               inst->alg.cra_alignmask = 7;
-       else
-               inst->alg.cra_alignmask = alg->cra_alignmask;
+       err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts",
+                                 &alg->base);
+       if (err)
+               goto err_drop_spawn;
 
-       inst->alg.cra_type = &crypto_blkcipher_type;
+       err = -EINVAL;
+       cipher_name = alg->base.cra_name;
 
-       inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-       inst->alg.cra_blkcipher.min_keysize =
-               2 * alg->cra_cipher.cia_min_keysize;
-       inst->alg.cra_blkcipher.max_keysize =
-               2 * alg->cra_cipher.cia_max_keysize;
+       /* Alas we screwed up the naming so we have to mangle the
+        * cipher name.
+        */
+       if (!strncmp(cipher_name, "ecb(", 4)) {
+               unsigned len;
 
-       inst->alg.cra_ctxsize = sizeof(struct priv);
+               len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name));
+               if (len < 2 || len >= sizeof(ctx->name))
+                       goto err_drop_spawn;
 
-       inst->alg.cra_init = init_tfm;
-       inst->alg.cra_exit = exit_tfm;
+               if (ctx->name[len - 1] != ')')
+                       goto err_drop_spawn;
 
-       inst->alg.cra_blkcipher.setkey = setkey;
-       inst->alg.cra_blkcipher.encrypt = encrypt;
-       inst->alg.cra_blkcipher.decrypt = decrypt;
+               ctx->name[len - 1] = 0;
 
-out_put_alg:
-       crypto_mod_put(alg);
-       return inst;
-}
+               if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+                            "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME)
+                       return -ENAMETOOLONG;
+       } else
+               goto err_drop_spawn;
 
-static void free(struct crypto_instance *inst)
-{
-       crypto_drop_spawn(crypto_instance_ctx(inst));
+       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_priority = alg->base.cra_priority;
+       inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE;
+       inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
+                                      (__alignof__(u64) - 1);
+
+       inst->alg.ivsize = XTS_BLOCK_SIZE;
+       inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) * 2;
+       inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) * 2;
+
+       inst->alg.base.cra_ctxsize = sizeof(struct priv);
+
+       inst->alg.init = init_tfm;
+       inst->alg.exit = exit_tfm;
+
+       inst->alg.setkey = setkey;
+       inst->alg.encrypt = encrypt;
+       inst->alg.decrypt = decrypt;
+
+       inst->free = free;
+
+       err = skcipher_register_instance(tmpl, inst);
+       if (err)
+               goto err_drop_spawn;
+
+out:
+       return err;
+
+err_drop_spawn:
+       crypto_drop_skcipher(&ctx->spawn);
+err_free_inst:
        kfree(inst);
+       goto out;
 }
 
 static struct crypto_template crypto_tmpl = {
        .name = "xts",
-       .alloc = alloc,
-       .free = free,
+       .create = create,
        .module = THIS_MODULE,
 };
 
index 200dab5..ceff2fc 100644 (file)
@@ -168,7 +168,7 @@ config HW_RANDOM_IXP4XX
 
 config HW_RANDOM_OMAP
        tristate "OMAP Random Number Generator support"
-       depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS
+       depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU
        default HW_RANDOM
        ---help---
          This driver provides kernel-side support for the Random Number
index 0fcc9e6..661c82c 100644 (file)
@@ -48,6 +48,16 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max,
                return 0;
 }
 
+static void atmel_trng_enable(struct atmel_trng *trng)
+{
+       writel(TRNG_KEY | 1, trng->base + TRNG_CR);
+}
+
+static void atmel_trng_disable(struct atmel_trng *trng)
+{
+       writel(TRNG_KEY, trng->base + TRNG_CR);
+}
+
 static int atmel_trng_probe(struct platform_device *pdev)
 {
        struct atmel_trng *trng;
@@ -71,7 +81,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       writel(TRNG_KEY | 1, trng->base + TRNG_CR);
+       atmel_trng_enable(trng);
        trng->rng.name = pdev->name;
        trng->rng.read = atmel_trng_read;
 
@@ -84,7 +94,7 @@ static int atmel_trng_probe(struct platform_device *pdev)
        return 0;
 
 err_register:
-       clk_disable(trng->clk);
+       clk_disable_unprepare(trng->clk);
        return ret;
 }
 
@@ -94,7 +104,7 @@ static int atmel_trng_remove(struct platform_device *pdev)
 
        hwrng_unregister(&trng->rng);
 
-       writel(TRNG_KEY, trng->base + TRNG_CR);
+       atmel_trng_disable(trng);
        clk_disable_unprepare(trng->clk);
 
        return 0;
@@ -105,6 +115,7 @@ static int atmel_trng_suspend(struct device *dev)
 {
        struct atmel_trng *trng = dev_get_drvdata(dev);
 
+       atmel_trng_disable(trng);
        clk_disable_unprepare(trng->clk);
 
        return 0;
@@ -113,8 +124,15 @@ static int atmel_trng_suspend(struct device *dev)
 static int atmel_trng_resume(struct device *dev)
 {
        struct atmel_trng *trng = dev_get_drvdata(dev);
+       int ret;
 
-       return clk_prepare_enable(trng->clk);
+       ret = clk_prepare_enable(trng->clk);
+       if (ret)
+               return ret;
+
+       atmel_trng_enable(trng);
+
+       return 0;
 }
 
 static const struct dev_pm_ops atmel_trng_pm_ops = {
index d2d2c89..f976641 100644 (file)
@@ -92,6 +92,7 @@ static void add_early_randomness(struct hwrng *rng)
        mutex_unlock(&reading_mutex);
        if (bytes_read > 0)
                add_device_randomness(rng_buffer, bytes_read);
+       memset(rng_buffer, 0, size);
 }
 
 static inline void cleanup_rng(struct kref *kref)
@@ -287,6 +288,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf,
                }
        }
 out:
+       memset(rng_buffer, 0, rng_buffer_size());
        return ret ? : err;
 
 out_unlock_reading:
@@ -425,6 +427,7 @@ static int hwrng_fillfn(void *unused)
                /* Outside lock, sure, but y'know: randomness. */
                add_hwgenerator_randomness((void *)rng_fillbuf, rc,
                                           rc * current_quality * 8 >> 10);
+               memset(rng_fillbuf, 0, rng_buffer_size());
        }
        hwrng_fill = NULL;
        return 0;
index 58bef39..119d698 100644 (file)
@@ -110,6 +110,7 @@ static const struct of_device_id meson_rng_of_match[] = {
        { .compatible = "amlogic,meson-rng", },
        {},
 };
+MODULE_DEVICE_TABLE(of, meson_rng_of_match);
 
 static struct platform_driver meson_rng_driver = {
        .probe  = meson_rng_probe,
@@ -121,7 +122,6 @@ static struct platform_driver meson_rng_driver = {
 
 module_platform_driver(meson_rng_driver);
 
-MODULE_ALIAS("platform:meson-rng");
 MODULE_DESCRIPTION("Meson H/W Random Number Generator driver");
 MODULE_AUTHOR("Lawrence Mok <lawrence.mok@amlogic.com>");
 MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>");
index 96fb986..841fee8 100644 (file)
@@ -90,10 +90,6 @@ static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait)
        /* calculate max size bytes to transfer back to caller */
        maxsize = min_t(size_t, MAX_HW_FIFO_SIZE, max);
 
-       /* no room for word data */
-       if (maxsize < WORD_SZ)
-               return 0;
-
        ret = clk_prepare_enable(rng->clk);
        if (ret)
                return ret;
index f5c26a5..3ad86fd 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/of_device.h>
 #include <linux/of_address.h>
 #include <linux/interrupt.h>
+#include <linux/clk.h>
 
 #include <asm/io.h>
 
 
 #define OMAP2_RNG_OUTPUT_SIZE                  0x4
 #define OMAP4_RNG_OUTPUT_SIZE                  0x8
+#define EIP76_RNG_OUTPUT_SIZE                  0x10
 
 enum {
-       RNG_OUTPUT_L_REG = 0,
-       RNG_OUTPUT_H_REG,
+       RNG_OUTPUT_0_REG = 0,
+       RNG_OUTPUT_1_REG,
+       RNG_OUTPUT_2_REG,
+       RNG_OUTPUT_3_REG,
        RNG_STATUS_REG,
        RNG_INTMASK_REG,
        RNG_INTACK_REG,
@@ -82,7 +86,7 @@ enum {
 };
 
 static const u16 reg_map_omap2[] = {
-       [RNG_OUTPUT_L_REG]      = 0x0,
+       [RNG_OUTPUT_0_REG]      = 0x0,
        [RNG_STATUS_REG]        = 0x4,
        [RNG_CONFIG_REG]        = 0x28,
        [RNG_REV_REG]           = 0x3c,
@@ -90,8 +94,8 @@ static const u16 reg_map_omap2[] = {
 };
 
 static const u16 reg_map_omap4[] = {
-       [RNG_OUTPUT_L_REG]      = 0x0,
-       [RNG_OUTPUT_H_REG]      = 0x4,
+       [RNG_OUTPUT_0_REG]      = 0x0,
+       [RNG_OUTPUT_1_REG]      = 0x4,
        [RNG_STATUS_REG]        = 0x8,
        [RNG_INTMASK_REG]       = 0xc,
        [RNG_INTACK_REG]        = 0x10,
@@ -106,6 +110,23 @@ static const u16 reg_map_omap4[] = {
        [RNG_SYSCONFIG_REG]     = 0x1FE4,
 };
 
+static const u16 reg_map_eip76[] = {
+       [RNG_OUTPUT_0_REG]      = 0x0,
+       [RNG_OUTPUT_1_REG]      = 0x4,
+       [RNG_OUTPUT_2_REG]      = 0x8,
+       [RNG_OUTPUT_3_REG]      = 0xc,
+       [RNG_STATUS_REG]        = 0x10,
+       [RNG_INTACK_REG]        = 0x10,
+       [RNG_CONTROL_REG]       = 0x14,
+       [RNG_CONFIG_REG]        = 0x18,
+       [RNG_ALARMCNT_REG]      = 0x1c,
+       [RNG_FROENABLE_REG]     = 0x20,
+       [RNG_FRODETUNE_REG]     = 0x24,
+       [RNG_ALARMMASK_REG]     = 0x28,
+       [RNG_ALARMSTOP_REG]     = 0x2c,
+       [RNG_REV_REG]           = 0x7c,
+};
+
 struct omap_rng_dev;
 /**
  * struct omap_rng_pdata - RNG IP block-specific data
@@ -127,6 +148,8 @@ struct omap_rng_dev {
        void __iomem                    *base;
        struct device                   *dev;
        const struct omap_rng_pdata     *pdata;
+       struct hwrng rng;
+       struct clk                      *clk;
 };
 
 static inline u32 omap_rng_read(struct omap_rng_dev *priv, u16 reg)
@@ -140,41 +163,35 @@ static inline void omap_rng_write(struct omap_rng_dev *priv, u16 reg,
        __raw_writel(val, priv->base + priv->pdata->regs[reg]);
 }
 
-static int omap_rng_data_present(struct hwrng *rng, int wait)
+
+static int omap_rng_do_read(struct hwrng *rng, void *data, size_t max,
+                           bool wait)
 {
        struct omap_rng_dev *priv;
-       int data, i;
+       int i, present;
 
        priv = (struct omap_rng_dev *)rng->priv;
 
+       if (max < priv->pdata->data_size)
+               return 0;
+
        for (i = 0; i < 20; i++) {
-               data = priv->pdata->data_present(priv);
-               if (data || !wait)
+               present = priv->pdata->data_present(priv);
+               if (present || !wait)
                        break;
-               /* RNG produces data fast enough (2+ MBit/sec, even
-                * during "rngtest" loads, that these delays don't
-                * seem to trigger.  We *could* use the RNG IRQ, but
-                * that'd be higher overhead ... so why bother?
-                */
+
                udelay(10);
        }
-       return data;
-}
-
-static int omap_rng_data_read(struct hwrng *rng, u32 *data)
-{
-       struct omap_rng_dev *priv;
-       u32 data_size, i;
-
-       priv = (struct omap_rng_dev *)rng->priv;
-       data_size = priv->pdata->data_size;
+       if (!present)
+               return 0;
 
-       for (i = 0; i < data_size / sizeof(u32); i++)
-               data[i] = omap_rng_read(priv, RNG_OUTPUT_L_REG + i);
+       memcpy_fromio(data, priv->base + priv->pdata->regs[RNG_OUTPUT_0_REG],
+                     priv->pdata->data_size);
 
        if (priv->pdata->regs[RNG_INTACK_REG])
                omap_rng_write(priv, RNG_INTACK_REG, RNG_REG_INTACK_RDY_MASK);
-       return data_size;
+
+       return priv->pdata->data_size;
 }
 
 static int omap_rng_init(struct hwrng *rng)
@@ -193,13 +210,6 @@ static void omap_rng_cleanup(struct hwrng *rng)
        priv->pdata->cleanup(priv);
 }
 
-static struct hwrng omap_rng_ops = {
-       .name           = "omap",
-       .data_present   = omap_rng_data_present,
-       .data_read      = omap_rng_data_read,
-       .init           = omap_rng_init,
-       .cleanup        = omap_rng_cleanup,
-};
 
 static inline u32 omap2_rng_data_present(struct omap_rng_dev *priv)
 {
@@ -231,6 +241,38 @@ static inline u32 omap4_rng_data_present(struct omap_rng_dev *priv)
        return omap_rng_read(priv, RNG_STATUS_REG) & RNG_REG_STATUS_RDY;
 }
 
+static int eip76_rng_init(struct omap_rng_dev *priv)
+{
+       u32 val;
+
+       /* Return if RNG is already running. */
+       if (omap_rng_read(priv, RNG_CONTROL_REG) & RNG_CONTROL_ENABLE_TRNG_MASK)
+               return 0;
+
+       /*  Number of 512 bit blocks of raw Noise Source output data that must
+        *  be processed by either the Conditioning Function or the
+        *  SP 800-90 DRBG â€˜BC_DF’ functionality to yield a â€˜full entropy’
+        *  output value.
+        */
+       val = 0x5 << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT;
+
+       /* Number of FRO samples that are XOR-ed together into one bit to be
+        * shifted into the main shift register
+        */
+       val |= RNG_CONFIG_MAX_REFIL_CYCLES << RNG_CONFIG_MAX_REFIL_CYCLES_SHIFT;
+       omap_rng_write(priv, RNG_CONFIG_REG, val);
+
+       /* Enable all available FROs */
+       omap_rng_write(priv, RNG_FRODETUNE_REG, 0x0);
+       omap_rng_write(priv, RNG_FROENABLE_REG, RNG_REG_FROENABLE_MASK);
+
+       /* Enable TRNG */
+       val = RNG_CONTROL_ENABLE_TRNG_MASK;
+       omap_rng_write(priv, RNG_CONTROL_REG, val);
+
+       return 0;
+}
+
 static int omap4_rng_init(struct omap_rng_dev *priv)
 {
        u32 val;
@@ -300,6 +342,14 @@ static struct omap_rng_pdata omap4_rng_pdata = {
        .cleanup        = omap4_rng_cleanup,
 };
 
+static struct omap_rng_pdata eip76_rng_pdata = {
+       .regs           = (u16 *)reg_map_eip76,
+       .data_size      = EIP76_RNG_OUTPUT_SIZE,
+       .data_present   = omap4_rng_data_present,
+       .init           = eip76_rng_init,
+       .cleanup        = omap4_rng_cleanup,
+};
+
 static const struct of_device_id omap_rng_of_match[] = {
                {
                        .compatible     = "ti,omap2-rng",
@@ -309,6 +359,10 @@ static const struct of_device_id omap_rng_of_match[] = {
                        .compatible     = "ti,omap4-rng",
                        .data           = &omap4_rng_pdata,
                },
+               {
+                       .compatible     = "inside-secure,safexcel-eip76",
+                       .data           = &eip76_rng_pdata,
+               },
                {},
 };
 MODULE_DEVICE_TABLE(of, omap_rng_of_match);
@@ -327,7 +381,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
        }
        priv->pdata = match->data;
 
-       if (of_device_is_compatible(dev->of_node, "ti,omap4-rng")) {
+       if (of_device_is_compatible(dev->of_node, "ti,omap4-rng") ||
+           of_device_is_compatible(dev->of_node, "inside-secure,safexcel-eip76")) {
                irq = platform_get_irq(pdev, 0);
                if (irq < 0) {
                        dev_err(dev, "%s: error getting IRQ resource - %d\n",
@@ -343,6 +398,16 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv,
                        return err;
                }
                omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK);
+
+               priv->clk = of_clk_get(pdev->dev.of_node, 0);
+               if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER)
+                       return -EPROBE_DEFER;
+               if (!IS_ERR(priv->clk)) {
+                       err = clk_prepare_enable(priv->clk);
+                       if (err)
+                               dev_err(&pdev->dev, "unable to enable the clk, "
+                                                   "err = %d\n", err);
+               }
        }
        return 0;
 }
@@ -372,7 +437,11 @@ static int omap_rng_probe(struct platform_device *pdev)
        if (!priv)
                return -ENOMEM;
 
-       omap_rng_ops.priv = (unsigned long)priv;
+       priv->rng.read = omap_rng_do_read;
+       priv->rng.init = omap_rng_init;
+       priv->rng.cleanup = omap_rng_cleanup;
+
+       priv->rng.priv = (unsigned long)priv;
        platform_set_drvdata(pdev, priv);
        priv->dev = dev;
 
@@ -383,6 +452,12 @@ static int omap_rng_probe(struct platform_device *pdev)
                goto err_ioremap;
        }
 
+       priv->rng.name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
+       if (!priv->rng.name) {
+               ret = -ENOMEM;
+               goto err_ioremap;
+       }
+
        pm_runtime_enable(&pdev->dev);
        ret = pm_runtime_get_sync(&pdev->dev);
        if (ret < 0) {
@@ -394,20 +469,24 @@ static int omap_rng_probe(struct platform_device *pdev)
        ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) :
                                get_omap_rng_device_details(priv);
        if (ret)
-               goto err_ioremap;
+               goto err_register;
 
-       ret = hwrng_register(&omap_rng_ops);
+       ret = hwrng_register(&priv->rng);
        if (ret)
                goto err_register;
 
-       dev_info(&pdev->dev, "OMAP Random Number Generator ver. %02x\n",
+       dev_info(&pdev->dev, "Random Number Generator ver. %02x\n",
                 omap_rng_read(priv, RNG_REV_REG));
 
        return 0;
 
 err_register:
        priv->base = NULL;
+       pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
+
+       if (!IS_ERR(priv->clk))
+               clk_disable_unprepare(priv->clk);
 err_ioremap:
        dev_err(dev, "initialization failed.\n");
        return ret;
@@ -417,13 +496,16 @@ static int omap_rng_remove(struct platform_device *pdev)
 {
        struct omap_rng_dev *priv = platform_get_drvdata(pdev);
 
-       hwrng_unregister(&omap_rng_ops);
+       hwrng_unregister(&priv->rng);
 
        priv->pdata->cleanup(priv);
 
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
 
+       if (!IS_ERR(priv->clk))
+               clk_disable_unprepare(priv->clk);
+
        return 0;
 }
 
index 11dc9b7..9b5e68a 100644 (file)
@@ -62,9 +62,6 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max,
        u32 t;
        unsigned int timeout = RNG_TIMEOUT;
 
-       if (max < 8)
-               return 0;
-
        do {
                t = readl(priv->base + RNGRCNT) & RCNT_MASK;
                if (t == 64) {
index 63ce51d..d9f46b4 100644 (file)
@@ -28,7 +28,6 @@
 static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 {
        u64 buffer[PLPAR_HCALL_BUFSIZE];
-       size_t size = max < 8 ? max : 8;
        int rc;
 
        rc = plpar_hcall(H_RANDOM, (unsigned long *)buffer);
@@ -36,10 +35,10 @@ static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait
                pr_err_ratelimited("H_RANDOM call failed %d\n", rc);
                return -EIO;
        }
-       memcpy(data, buffer, size);
+       memcpy(data, buffer, 8);
 
        /* The hypervisor interface returns 64 bits */
-       return size;
+       return 8;
 }
 
 /**
index dae1e39..d10b4ae 100644 (file)
@@ -135,8 +135,7 @@ int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size)
        ctx->sa_out = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4,
                                         &ctx->sa_out_dma_addr, GFP_ATOMIC);
        if (ctx->sa_out == NULL) {
-               dma_free_coherent(ctx->dev->core_dev->device,
-                                 ctx->sa_len * 4,
+               dma_free_coherent(ctx->dev->core_dev->device, size * 4,
                                  ctx->sa_in, ctx->sa_in_dma_addr);
                return -ENOMEM;
        }
index 6c2951b..0ec0440 100644 (file)
@@ -28,6 +28,7 @@
 #define AES_MR_OPMOD_CFB               (0x3 << 12)
 #define AES_MR_OPMOD_CTR               (0x4 << 12)
 #define AES_MR_OPMOD_GCM               (0x5 << 12)
+#define AES_MR_OPMOD_XTS               (0x6 << 12)
 #define AES_MR_LOD                             (0x1 << 15)
 #define AES_MR_CFBS_MASK               (0x7 << 16)
 #define AES_MR_CFBS_128b               (0x0 << 16)
@@ -67,6 +68,9 @@
 #define AES_CTRR       0x98
 #define AES_GCMHR(x)   (0x9c + ((x) * 0x04))
 
+#define AES_TWR(x)     (0xc0 + ((x) * 0x04))
+#define AES_ALPHAR(x)  (0xd0 + ((x) * 0x04))
+
 #define AES_HW_VERSION 0xFC
 
 #endif /* __ATMEL_AES_REGS_H__ */
index e3d40a8..0e3d0d6 100644 (file)
@@ -36,6 +36,7 @@
 #include <crypto/scatterwalk.h>
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
+#include <crypto/xts.h>
 #include <crypto/internal/aead.h>
 #include <linux/platform_data/crypto-atmel.h>
 #include <dt-bindings/dma/at91.h>
@@ -68,6 +69,7 @@
 #define AES_FLAGS_CFB8         (AES_MR_OPMOD_CFB | AES_MR_CFBS_8b)
 #define AES_FLAGS_CTR          AES_MR_OPMOD_CTR
 #define AES_FLAGS_GCM          AES_MR_OPMOD_GCM
+#define AES_FLAGS_XTS          AES_MR_OPMOD_XTS
 
 #define AES_FLAGS_MODE_MASK    (AES_FLAGS_OPMODE_MASK |        \
                                 AES_FLAGS_ENCRYPT |            \
@@ -89,6 +91,7 @@ struct atmel_aes_caps {
        bool                    has_cfb64;
        bool                    has_ctr32;
        bool                    has_gcm;
+       bool                    has_xts;
        u32                     max_burst_size;
 };
 
@@ -135,6 +138,12 @@ struct atmel_aes_gcm_ctx {
        atmel_aes_fn_t          ghash_resume;
 };
 
+struct atmel_aes_xts_ctx {
+       struct atmel_aes_base_ctx       base;
+
+       u32                     key2[AES_KEYSIZE_256 / sizeof(u32)];
+};
+
 struct atmel_aes_reqctx {
        unsigned long           mode;
 };
@@ -282,6 +291,20 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
                snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
                break;
 
+       case AES_TWR(0):
+       case AES_TWR(1):
+       case AES_TWR(2):
+       case AES_TWR(3):
+               snprintf(tmp, sz, "TWR[%u]", (offset - AES_TWR(0)) >> 2);
+               break;
+
+       case AES_ALPHAR(0):
+       case AES_ALPHAR(1):
+       case AES_ALPHAR(2):
+       case AES_ALPHAR(3):
+               snprintf(tmp, sz, "ALPHAR[%u]", (offset - AES_ALPHAR(0)) >> 2);
+               break;
+
        default:
                snprintf(tmp, sz, "0x%02x", offset);
                break;
@@ -317,7 +340,7 @@ static inline void atmel_aes_write(struct atmel_aes_dev *dd,
                char tmp[16];
 
                dev_vdbg(dd->dev, "write 0x%08x into %s\n", value,
-                        atmel_aes_reg_name(offset, tmp));
+                        atmel_aes_reg_name(offset, tmp, sizeof(tmp)));
        }
 #endif /* VERBOSE_DEBUG */
 
@@ -453,15 +476,15 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
        return err;
 }
 
-static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
-                                const u32 *iv)
+static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool use_dma,
+                                    const u32 *iv, const u32 *key, int keylen)
 {
        u32 valmr = 0;
 
        /* MR register must be set before IV registers */
-       if (dd->ctx->keylen == AES_KEYSIZE_128)
+       if (keylen == AES_KEYSIZE_128)
                valmr |= AES_MR_KEYSIZE_128;
-       else if (dd->ctx->keylen == AES_KEYSIZE_192)
+       else if (keylen == AES_KEYSIZE_192)
                valmr |= AES_MR_KEYSIZE_192;
        else
                valmr |= AES_MR_KEYSIZE_256;
@@ -478,13 +501,19 @@ static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
 
        atmel_aes_write(dd, AES_MR, valmr);
 
-       atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key,
-                         SIZE_IN_WORDS(dd->ctx->keylen));
+       atmel_aes_write_n(dd, AES_KEYWR(0), key, SIZE_IN_WORDS(keylen));
 
        if (iv && (valmr & AES_MR_OPMOD_MASK) != AES_MR_OPMOD_ECB)
                atmel_aes_write_block(dd, AES_IVR(0), iv);
 }
 
+static inline void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
+                                       const u32 *iv)
+
+{
+       atmel_aes_write_ctrl_key(dd, use_dma, iv,
+                                dd->ctx->key, dd->ctx->keylen);
+}
 
 /* CPU transfer */
 
@@ -1769,6 +1798,137 @@ static struct aead_alg aes_gcm_alg = {
 };
 
 
+/* xts functions */
+
+static inline struct atmel_aes_xts_ctx *
+atmel_aes_xts_ctx_cast(struct atmel_aes_base_ctx *ctx)
+{
+       return container_of(ctx, struct atmel_aes_xts_ctx, base);
+}
+
+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd);
+
+static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
+{
+       struct atmel_aes_xts_ctx *ctx = atmel_aes_xts_ctx_cast(dd->ctx);
+       struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
+       struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+       unsigned long flags;
+       int err;
+
+       atmel_aes_set_mode(dd, rctx);
+
+       err = atmel_aes_hw_init(dd);
+       if (err)
+               return atmel_aes_complete(dd, err);
+
+       /* Compute the tweak value from req->info with ecb(aes). */
+       flags = dd->flags;
+       dd->flags &= ~AES_FLAGS_MODE_MASK;
+       dd->flags |= (AES_FLAGS_ECB | AES_FLAGS_ENCRYPT);
+       atmel_aes_write_ctrl_key(dd, false, NULL,
+                                ctx->key2, ctx->base.keylen);
+       dd->flags = flags;
+
+       atmel_aes_write_block(dd, AES_IDATAR(0), req->info);
+       return atmel_aes_wait_for_data_ready(dd, atmel_aes_xts_process_data);
+}
+
+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
+{
+       struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
+       bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD);
+       u32 tweak[AES_BLOCK_SIZE / sizeof(u32)];
+       static const u32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), };
+       u8 *tweak_bytes = (u8 *)tweak;
+       int i;
+
+       /* Read the computed ciphered tweak value. */
+       atmel_aes_read_block(dd, AES_ODATAR(0), tweak);
+       /*
+        * Hardware quirk:
+        * the order of the ciphered tweak bytes need to be reversed before
+        * writing them into the ODATARx registers.
+        */
+       for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
+               u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
+
+               tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
+               tweak_bytes[i] = tmp;
+       }
+
+       /* Process the data. */
+       atmel_aes_write_ctrl(dd, use_dma, NULL);
+       atmel_aes_write_block(dd, AES_TWR(0), tweak);
+       atmel_aes_write_block(dd, AES_ALPHAR(0), one);
+       if (use_dma)
+               return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes,
+                                          atmel_aes_transfer_complete);
+
+       return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes,
+                                  atmel_aes_transfer_complete);
+}
+
+static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                               unsigned int keylen)
+{
+       struct atmel_aes_xts_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+       int err;
+
+       err = xts_check_key(crypto_ablkcipher_tfm(tfm), key, keylen);
+       if (err)
+               return err;
+
+       memcpy(ctx->base.key, key, keylen/2);
+       memcpy(ctx->key2, key + keylen/2, keylen/2);
+       ctx->base.keylen = keylen/2;
+
+       return 0;
+}
+
+static int atmel_aes_xts_encrypt(struct ablkcipher_request *req)
+{
+       return atmel_aes_crypt(req, AES_FLAGS_XTS | AES_FLAGS_ENCRYPT);
+}
+
+static int atmel_aes_xts_decrypt(struct ablkcipher_request *req)
+{
+       return atmel_aes_crypt(req, AES_FLAGS_XTS);
+}
+
+static int atmel_aes_xts_cra_init(struct crypto_tfm *tfm)
+{
+       struct atmel_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
+       ctx->base.start = atmel_aes_xts_start;
+
+       return 0;
+}
+
+static struct crypto_alg aes_xts_alg = {
+       .cra_name               = "xts(aes)",
+       .cra_driver_name        = "atmel-xts-aes",
+       .cra_priority           = ATMEL_AES_PRIORITY,
+       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+       .cra_blocksize          = AES_BLOCK_SIZE,
+       .cra_ctxsize            = sizeof(struct atmel_aes_xts_ctx),
+       .cra_alignmask          = 0xf,
+       .cra_type               = &crypto_ablkcipher_type,
+       .cra_module             = THIS_MODULE,
+       .cra_init               = atmel_aes_xts_cra_init,
+       .cra_exit               = atmel_aes_cra_exit,
+       .cra_u.ablkcipher = {
+               .min_keysize    = 2 * AES_MIN_KEY_SIZE,
+               .max_keysize    = 2 * AES_MAX_KEY_SIZE,
+               .ivsize         = AES_BLOCK_SIZE,
+               .setkey         = atmel_aes_xts_setkey,
+               .encrypt        = atmel_aes_xts_encrypt,
+               .decrypt        = atmel_aes_xts_decrypt,
+       }
+};
+
+
 /* Probe functions */
 
 static int atmel_aes_buff_init(struct atmel_aes_dev *dd)
@@ -1877,6 +2037,9 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
 {
        int i;
 
+       if (dd->caps.has_xts)
+               crypto_unregister_alg(&aes_xts_alg);
+
        if (dd->caps.has_gcm)
                crypto_unregister_aead(&aes_gcm_alg);
 
@@ -1909,8 +2072,16 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
                        goto err_aes_gcm_alg;
        }
 
+       if (dd->caps.has_xts) {
+               err = crypto_register_alg(&aes_xts_alg);
+               if (err)
+                       goto err_aes_xts_alg;
+       }
+
        return 0;
 
+err_aes_xts_alg:
+       crypto_unregister_aead(&aes_gcm_alg);
 err_aes_gcm_alg:
        crypto_unregister_alg(&aes_cfb64_alg);
 err_aes_cfb64_alg:
@@ -1928,6 +2099,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
        dd->caps.has_cfb64 = 0;
        dd->caps.has_ctr32 = 0;
        dd->caps.has_gcm = 0;
+       dd->caps.has_xts = 0;
        dd->caps.max_burst_size = 1;
 
        /* keep only major version number */
@@ -1937,6 +2109,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
                dd->caps.has_cfb64 = 1;
                dd->caps.has_ctr32 = 1;
                dd->caps.has_gcm = 1;
+               dd->caps.has_xts = 1;
                dd->caps.max_burst_size = 4;
                break;
        case 0x200:
@@ -2138,7 +2311,7 @@ aes_dd_err:
 
 static int atmel_aes_remove(struct platform_device *pdev)
 {
-       static struct atmel_aes_dev *aes_dd;
+       struct atmel_aes_dev *aes_dd;
 
        aes_dd = platform_get_drvdata(pdev);
        if (!aes_dd)
index 64bf302..bc0d356 100644 (file)
@@ -74,7 +74,7 @@ config CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD
 
 config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
        tristate "Register algorithm implementations with the Crypto API"
-       depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+       depends on CRYPTO_DEV_FSL_CAAM_JR
        default y
        select CRYPTO_AEAD
        select CRYPTO_AUTHENC
@@ -89,7 +89,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 
 config CRYPTO_DEV_FSL_CAAM_AHASH_API
        tristate "Register hash algorithm implementations with Crypto API"
-       depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+       depends on CRYPTO_DEV_FSL_CAAM_JR
        default y
        select CRYPTO_HASH
        help
@@ -101,7 +101,7 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API
 
 config CRYPTO_DEV_FSL_CAAM_PKC_API
         tristate "Register public key cryptography implementations with Crypto API"
-        depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+        depends on CRYPTO_DEV_FSL_CAAM_JR
         default y
         select CRYPTO_RSA
         help
@@ -113,7 +113,7 @@ config CRYPTO_DEV_FSL_CAAM_PKC_API
 
 config CRYPTO_DEV_FSL_CAAM_RNG_API
        tristate "Register caam device for hwrng API"
-       depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR
+       depends on CRYPTO_DEV_FSL_CAAM_JR
        default y
        select CRYPTO_RNG
        select HW_RANDOM
@@ -134,3 +134,6 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG
        help
          Selecting this will enable printing of various debug
          information in the CAAM driver.
+
+config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
+       def_tristate CRYPTO_DEV_FSL_CAAM_CRYPTO_API
index 08bf551..6554742 100644 (file)
@@ -8,6 +8,7 @@ endif
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o
+obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC) += caamalg_desc.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o
 obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o
index 954a64c..662fe94 100644 (file)
@@ -2,6 +2,7 @@
  * caam - Freescale FSL CAAM support for crypto API
  *
  * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ * Copyright 2016 NXP
  *
  * Based on talitos crypto API driver.
  *
@@ -53,6 +54,7 @@
 #include "error.h"
 #include "sg_sw_sec4.h"
 #include "key_gen.h"
+#include "caamalg_desc.h"
 
 /*
  * crypto alg
@@ -62,8 +64,6 @@
 #define CAAM_MAX_KEY_SIZE              (AES_MAX_KEY_SIZE + \
                                         CTR_RFC3686_NONCE_SIZE + \
                                         SHA512_DIGEST_SIZE * 2)
-/* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */
-#define CAAM_MAX_IV_LENGTH             16
 
 #define AEAD_DESC_JOB_IO_LEN           (DESC_JOB_IO_LEN + CAAM_CMD_SZ * 2)
 #define GCM_DESC_JOB_IO_LEN            (AEAD_DESC_JOB_IO_LEN + \
 #define AUTHENC_DESC_JOB_IO_LEN                (AEAD_DESC_JOB_IO_LEN + \
                                         CAAM_CMD_SZ * 5)
 
-/* length of descriptors text */
-#define DESC_AEAD_BASE                 (4 * CAAM_CMD_SZ)
-#define DESC_AEAD_ENC_LEN              (DESC_AEAD_BASE + 11 * CAAM_CMD_SZ)
-#define DESC_AEAD_DEC_LEN              (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
-#define DESC_AEAD_GIVENC_LEN           (DESC_AEAD_ENC_LEN + 9 * CAAM_CMD_SZ)
-
-/* Note: Nonce is counted in enckeylen */
-#define DESC_AEAD_CTR_RFC3686_LEN      (4 * CAAM_CMD_SZ)
-
-#define DESC_AEAD_NULL_BASE            (3 * CAAM_CMD_SZ)
-#define DESC_AEAD_NULL_ENC_LEN         (DESC_AEAD_NULL_BASE + 11 * CAAM_CMD_SZ)
-#define DESC_AEAD_NULL_DEC_LEN         (DESC_AEAD_NULL_BASE + 13 * CAAM_CMD_SZ)
-
-#define DESC_GCM_BASE                  (3 * CAAM_CMD_SZ)
-#define DESC_GCM_ENC_LEN               (DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
-#define DESC_GCM_DEC_LEN               (DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
-
-#define DESC_RFC4106_BASE              (3 * CAAM_CMD_SZ)
-#define DESC_RFC4106_ENC_LEN           (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
-#define DESC_RFC4106_DEC_LEN           (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
-
-#define DESC_RFC4543_BASE              (3 * CAAM_CMD_SZ)
-#define DESC_RFC4543_ENC_LEN           (DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
-#define DESC_RFC4543_DEC_LEN           (DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
-
-#define DESC_ABLKCIPHER_BASE           (3 * CAAM_CMD_SZ)
-#define DESC_ABLKCIPHER_ENC_LEN                (DESC_ABLKCIPHER_BASE + \
-                                        20 * CAAM_CMD_SZ)
-#define DESC_ABLKCIPHER_DEC_LEN                (DESC_ABLKCIPHER_BASE + \
-                                        15 * CAAM_CMD_SZ)
-
 #define DESC_MAX_USED_BYTES            (CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN)
 #define DESC_MAX_USED_LEN              (DESC_MAX_USED_BYTES / CAAM_CMD_SZ)
 
 
 static void dbg_dump_sg(const char *level, const char *prefix_str,
                        int prefix_type, int rowsize, int groupsize,
-                       struct scatterlist *sg, size_t tlen, bool ascii,
-                       bool may_sleep)
+                       struct scatterlist *sg, size_t tlen, bool ascii)
 {
        struct scatterlist *it;
        void *it_page;
@@ -152,7 +120,6 @@ static struct list_head alg_list;
 struct caam_alg_entry {
        int class1_alg_type;
        int class2_alg_type;
-       int alg_op;
        bool rfc3686;
        bool geniv;
 };
@@ -163,52 +130,6 @@ struct caam_aead_alg {
        bool registered;
 };
 
-/* Set DK bit in class 1 operation if shared */
-static inline void append_dec_op1(u32 *desc, u32 type)
-{
-       u32 *jump_cmd, *uncond_jump_cmd;
-
-       /* DK bit is valid only for AES */
-       if ((type & OP_ALG_ALGSEL_MASK) != OP_ALG_ALGSEL_AES) {
-               append_operation(desc, type | OP_ALG_AS_INITFINAL |
-                                OP_ALG_DECRYPT);
-               return;
-       }
-
-       jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD);
-       append_operation(desc, type | OP_ALG_AS_INITFINAL |
-                        OP_ALG_DECRYPT);
-       uncond_jump_cmd = append_jump(desc, JUMP_TEST_ALL);
-       set_jump_tgt_here(desc, jump_cmd);
-       append_operation(desc, type | OP_ALG_AS_INITFINAL |
-                        OP_ALG_DECRYPT | OP_ALG_AAI_DK);
-       set_jump_tgt_here(desc, uncond_jump_cmd);
-}
-
-/*
- * For aead functions, read payload and write payload,
- * both of which are specified in req->src and req->dst
- */
-static inline void aead_append_src_dst(u32 *desc, u32 msg_type)
-{
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH |
-                            KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH);
-}
-
-/*
- * For ablkcipher encrypt and decrypt, read from req->src and
- * write to req->dst
- */
-static inline void ablkcipher_append_src_dst(u32 *desc)
-{
-       append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 |
-                            KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
-}
-
 /*
  * per-session context
  */
@@ -220,147 +141,36 @@ struct caam_ctx {
        dma_addr_t sh_desc_enc_dma;
        dma_addr_t sh_desc_dec_dma;
        dma_addr_t sh_desc_givenc_dma;
-       u32 class1_alg_type;
-       u32 class2_alg_type;
-       u32 alg_op;
        u8 key[CAAM_MAX_KEY_SIZE];
        dma_addr_t key_dma;
-       unsigned int enckeylen;
-       unsigned int split_key_len;
-       unsigned int split_key_pad_len;
+       struct alginfo adata;
+       struct alginfo cdata;
        unsigned int authsize;
 };
 
-static void append_key_aead(u32 *desc, struct caam_ctx *ctx,
-                           int keys_fit_inline, bool is_rfc3686)
-{
-       u32 *nonce;
-       unsigned int enckeylen = ctx->enckeylen;
-
-       /*
-        * RFC3686 specific:
-        *      | ctx->key = {AUTH_KEY, ENC_KEY, NONCE}
-        *      | enckeylen = encryption key size + nonce size
-        */
-       if (is_rfc3686)
-               enckeylen -= CTR_RFC3686_NONCE_SIZE;
-
-       if (keys_fit_inline) {
-               append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-                                 ctx->split_key_len, CLASS_2 |
-                                 KEY_DEST_MDHA_SPLIT | KEY_ENC);
-               append_key_as_imm(desc, (void *)ctx->key +
-                                 ctx->split_key_pad_len, enckeylen,
-                                 enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       } else {
-               append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
-                          KEY_DEST_MDHA_SPLIT | KEY_ENC);
-               append_key(desc, ctx->key_dma + ctx->split_key_pad_len,
-                          enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       }
-
-       /* Load Counter into CONTEXT1 reg */
-       if (is_rfc3686) {
-               nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len +
-                              enckeylen);
-               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-                                  LDST_CLASS_IND_CCB |
-                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-               append_move(desc,
-                           MOVE_SRC_OUTFIFO |
-                           MOVE_DEST_CLASS1CTX |
-                           (16 << MOVE_OFFSET_SHIFT) |
-                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-       }
-}
-
-static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx,
-                                 int keys_fit_inline, bool is_rfc3686)
-{
-       u32 *key_jump_cmd;
-
-       /* Note: Context registers are saved. */
-       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-
-       append_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-       set_jump_tgt_here(desc, key_jump_cmd);
-}
-
 static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
        struct caam_ctx *ctx = crypto_aead_ctx(aead);
        struct device *jrdev = ctx->jrdev;
-       bool keys_fit_inline = false;
-       u32 *key_jump_cmd, *jump_cmd, *read_move_cmd, *write_move_cmd;
        u32 *desc;
+       int rem_bytes = CAAM_DESC_BYTES_MAX - AEAD_DESC_JOB_IO_LEN -
+                       ctx->adata.keylen_pad;
 
        /*
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       if (DESC_AEAD_NULL_ENC_LEN + AEAD_DESC_JOB_IO_LEN +
-           ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
+       if (rem_bytes >= DESC_AEAD_NULL_ENC_LEN) {
+               ctx->adata.key_inline = true;
+               ctx->adata.key_virt = ctx->key;
+       } else {
+               ctx->adata.key_inline = false;
+               ctx->adata.key_dma = ctx->key_dma;
+       }
 
        /* aead_encrypt shared descriptor */
        desc = ctx->sh_desc_enc;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-                                 ctx->split_key_len, CLASS_2 |
-                                 KEY_DEST_MDHA_SPLIT | KEY_ENC);
-       else
-               append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
-                          KEY_DEST_MDHA_SPLIT | KEY_ENC);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* assoclen + cryptlen = seqinlen */
-       append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /* Prepare to read and write cryptlen + assoclen bytes */
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /*
-        * MOVE_LEN opcode is not available in all SEC HW revisions,
-        * thus need to do some magic, i.e. self-patch the descriptor
-        * buffer.
-        */
-       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
-                                   MOVE_DEST_MATH3 |
-                                   (0x6 << MOVE_LEN_SHIFT));
-       write_move_cmd = append_move(desc, MOVE_SRC_MATH3 |
-                                    MOVE_DEST_DESCBUF |
-                                    MOVE_WAITCOMP |
-                                    (0x8 << MOVE_LEN_SHIFT));
-
-       /* Class 2 operation */
-       append_operation(desc, ctx->class2_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* Read and write cryptlen bytes */
-       aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-       set_move_tgt_here(desc, read_move_cmd);
-       set_move_tgt_here(desc, write_move_cmd);
-       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
-                   MOVE_AUX_LS);
-
-       /* Write ICV */
-       append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
-                        LDST_SRCDST_BYTE_CONTEXT);
-
+       cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -368,84 +178,22 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR,
-                      "aead null enc shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        /*
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       keys_fit_inline = false;
-       if (DESC_AEAD_NULL_DEC_LEN + DESC_JOB_IO_LEN +
-           ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
-
-       desc = ctx->sh_desc_dec;
+       if (rem_bytes >= DESC_AEAD_NULL_DEC_LEN) {
+               ctx->adata.key_inline = true;
+               ctx->adata.key_virt = ctx->key;
+       } else {
+               ctx->adata.key_inline = false;
+               ctx->adata.key_dma = ctx->key_dma;
+       }
 
        /* aead_decrypt shared descriptor */
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-                                 ctx->split_key_len, CLASS_2 |
-                                 KEY_DEST_MDHA_SPLIT | KEY_ENC);
-       else
-               append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 |
-                          KEY_DEST_MDHA_SPLIT | KEY_ENC);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* Class 2 operation */
-       append_operation(desc, ctx->class2_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-       /* assoclen + cryptlen = seqoutlen */
-       append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-       /* Prepare to read and write cryptlen + assoclen bytes */
-       append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ);
-
-       /*
-        * MOVE_LEN opcode is not available in all SEC HW revisions,
-        * thus need to do some magic, i.e. self-patch the descriptor
-        * buffer.
-        */
-       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
-                                   MOVE_DEST_MATH2 |
-                                   (0x6 << MOVE_LEN_SHIFT));
-       write_move_cmd = append_move(desc, MOVE_SRC_MATH2 |
-                                    MOVE_DEST_DESCBUF |
-                                    MOVE_WAITCOMP |
-                                    (0x8 << MOVE_LEN_SHIFT));
-
-       /* Read and write cryptlen bytes */
-       aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-       /*
-        * Insert a NOP here, since we need at least 4 instructions between
-        * code patching the descriptor buffer and the location being patched.
-        */
-       jump_cmd = append_jump(desc, JUMP_TEST_ALL);
-       set_jump_tgt_here(desc, jump_cmd);
-
-       set_move_tgt_here(desc, read_move_cmd);
-       set_move_tgt_here(desc, write_move_cmd);
-       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
-                   MOVE_AUX_LS);
-       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-       /* Load ICV */
-       append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 |
-                            FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
-
+       desc = ctx->sh_desc_dec;
+       cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize);
        ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -453,12 +201,6 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR,
-                      "aead null dec shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        return 0;
 }
@@ -470,11 +212,11 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
        unsigned int ivsize = crypto_aead_ivsize(aead);
        struct caam_ctx *ctx = crypto_aead_ctx(aead);
        struct device *jrdev = ctx->jrdev;
-       bool keys_fit_inline;
-       u32 geniv, moveiv;
        u32 ctx1_iv_off = 0;
-       u32 *desc;
-       const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
+       u32 *desc, *nonce = NULL;
+       u32 inl_mask;
+       unsigned int data_len[2];
+       const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
                               OP_ALG_AAI_CTR_MOD128);
        const bool is_rfc3686 = alg->caam.rfc3686;
 
@@ -482,7 +224,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
                return 0;
 
        /* NULL encryption / decryption */
-       if (!ctx->enckeylen)
+       if (!ctx->cdata.keylen)
                return aead_null_set_sh_desc(aead);
 
        /*
@@ -497,8 +239,14 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
         * RFC3686 specific:
         *      CONTEXT1[255:128] = {NONCE, IV, COUNTER}
         */
-       if (is_rfc3686)
+       if (is_rfc3686) {
                ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
+               nonce = (u32 *)((void *)ctx->key + ctx->adata.keylen_pad +
+                               ctx->cdata.keylen - CTR_RFC3686_NONCE_SIZE);
+       }
+
+       data_len[0] = ctx->adata.keylen_pad;
+       data_len[1] = ctx->cdata.keylen;
 
        if (alg->caam.geniv)
                goto skip_enc;
@@ -507,54 +255,29 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       keys_fit_inline = false;
-       if (DESC_AEAD_ENC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-           ctx->split_key_pad_len + ctx->enckeylen +
-           (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-           CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
-
-       /* aead_encrypt shared descriptor */
-       desc = ctx->sh_desc_enc;
-
-       /* Note: Context registers are saved. */
-       init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-       /* Class 2 operation */
-       append_operation(desc, ctx->class2_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* Read and write assoclen bytes */
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /* Skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-       /* read assoc before reading payload */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
-                                     FIFOLDST_VLF);
-
-       /* Load Counter into CONTEXT1 reg */
-       if (is_rfc3686)
-               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-                                    LDST_SRCDST_BYTE_CONTEXT |
-                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                     LDST_OFFSET_SHIFT));
+       if (desc_inline_query(DESC_AEAD_ENC_LEN +
+                             (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+                             AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+                             ARRAY_SIZE(data_len)) < 0)
+               return -EINVAL;
 
-       /* Class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
+       if (inl_mask & 1)
+               ctx->adata.key_virt = ctx->key;
+       else
+               ctx->adata.key_dma = ctx->key_dma;
 
-       /* Read and write cryptlen bytes */
-       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-       aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
+       if (inl_mask & 2)
+               ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+       else
+               ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
-       /* Write ICV */
-       append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
-                        LDST_SRCDST_BYTE_CONTEXT);
+       ctx->adata.key_inline = !!(inl_mask & 1);
+       ctx->cdata.key_inline = !!(inl_mask & 2);
 
+       /* aead_encrypt shared descriptor */
+       desc = ctx->sh_desc_enc;
+       cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata, ctx->authsize,
+                              is_rfc3686, nonce, ctx1_iv_off);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -562,79 +285,36 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "aead enc shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
 skip_enc:
        /*
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       keys_fit_inline = false;
-       if (DESC_AEAD_DEC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-           ctx->split_key_pad_len + ctx->enckeylen +
-           (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-           CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
-
-       /* aead_decrypt shared descriptor */
-       desc = ctx->sh_desc_dec;
-
-       /* Note: Context registers are saved. */
-       init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-       /* Class 2 operation */
-       append_operation(desc, ctx->class2_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+       if (desc_inline_query(DESC_AEAD_DEC_LEN +
+                             (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+                             AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+                             ARRAY_SIZE(data_len)) < 0)
+               return -EINVAL;
 
-       /* Read and write assoclen bytes */
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-       if (alg->caam.geniv)
-               append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
+       if (inl_mask & 1)
+               ctx->adata.key_virt = ctx->key;
        else
-               append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /* Skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-       /* read assoc before reading payload */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
-                            KEY_VLF);
+               ctx->adata.key_dma = ctx->key_dma;
 
-       if (alg->caam.geniv) {
-               append_seq_load(desc, ivsize, LDST_CLASS_1_CCB |
-                               LDST_SRCDST_BYTE_CONTEXT |
-                               (ctx1_iv_off << LDST_OFFSET_SHIFT));
-               append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO |
-                           (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize);
-       }
-
-       /* Load Counter into CONTEXT1 reg */
-       if (is_rfc3686)
-               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-                                    LDST_SRCDST_BYTE_CONTEXT |
-                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                     LDST_OFFSET_SHIFT));
-
-       /* Choose operation */
-       if (ctr_mode)
-               append_operation(desc, ctx->class1_alg_type |
-                                OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT);
+       if (inl_mask & 2)
+               ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
        else
-               append_dec_op1(desc, ctx->class1_alg_type);
-
-       /* Read and write cryptlen bytes */
-       append_math_add(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-       aead_append_src_dst(desc, FIFOLD_TYPE_MSG);
+               ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
-       /* Load ICV */
-       append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 |
-                            FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
+       ctx->adata.key_inline = !!(inl_mask & 1);
+       ctx->cdata.key_inline = !!(inl_mask & 2);
 
+       /* aead_decrypt shared descriptor */
+       desc = ctx->sh_desc_dec;
+       cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata, ivsize,
+                              ctx->authsize, alg->caam.geniv, is_rfc3686,
+                              nonce, ctx1_iv_off);
        ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -642,11 +322,6 @@ skip_enc:
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "aead dec shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        if (!alg->caam.geniv)
                goto skip_givenc;
@@ -655,93 +330,30 @@ skip_enc:
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       keys_fit_inline = false;
-       if (DESC_AEAD_GIVENC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-           ctx->split_key_pad_len + ctx->enckeylen +
-           (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-           CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
-
-       /* aead_givencrypt shared descriptor */
-       desc = ctx->sh_desc_enc;
-
-       /* Note: Context registers are saved. */
-       init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
-
-       if (is_rfc3686)
-               goto copy_iv;
-
-       /* Generate IV */
-       geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
-               NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-               NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
-       append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
-                           LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-       append_move(desc, MOVE_WAITCOMP |
-                   MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX |
-                   (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-                   (ivsize << MOVE_LEN_SHIFT));
-       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-copy_iv:
-       /* Copy IV to class 1 context */
-       append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO |
-                   (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-                   (ivsize << MOVE_LEN_SHIFT));
-
-       /* Return to encryption */
-       append_operation(desc, ctx->class2_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* Read and write assoclen bytes */
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /* ivsize + cryptlen = seqoutlen - authsize */
-       append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
-       /* Skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-       /* read assoc before reading payload */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
-                            KEY_VLF);
-
-       /* Copy iv from outfifo to class 2 fifo */
-       moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 |
-                NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT);
-       append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB |
-                           LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-       append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB |
-                           LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM);
-
-       /* Load Counter into CONTEXT1 reg */
-       if (is_rfc3686)
-               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-                                    LDST_SRCDST_BYTE_CONTEXT |
-                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                     LDST_OFFSET_SHIFT));
-
-       /* Class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* Will write ivsize + cryptlen */
-       append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       if (desc_inline_query(DESC_AEAD_GIVENC_LEN +
+                             (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+                             AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+                             ARRAY_SIZE(data_len)) < 0)
+               return -EINVAL;
 
-       /* Not need to reload iv */
-       append_seq_fifo_load(desc, ivsize,
-                            FIFOLD_CLASS_SKIP);
+       if (inl_mask & 1)
+               ctx->adata.key_virt = ctx->key;
+       else
+               ctx->adata.key_dma = ctx->key_dma;
 
-       /* Will read cryptlen */
-       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-       aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
+       if (inl_mask & 2)
+               ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+       else
+               ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
 
-       /* Write ICV */
-       append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB |
-                        LDST_SRCDST_BYTE_CONTEXT);
+       ctx->adata.key_inline = !!(inl_mask & 1);
+       ctx->cdata.key_inline = !!(inl_mask & 2);
 
+       /* aead_givencrypt shared descriptor */
+       desc = ctx->sh_desc_enc;
+       cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata, ivsize,
+                                 ctx->authsize, is_rfc3686, nonce,
+                                 ctx1_iv_off);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -749,11 +361,6 @@ copy_iv:
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "aead givenc shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
 skip_givenc:
        return 0;
@@ -774,12 +381,11 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
        struct caam_ctx *ctx = crypto_aead_ctx(aead);
        struct device *jrdev = ctx->jrdev;
-       bool keys_fit_inline = false;
-       u32 *key_jump_cmd, *zero_payload_jump_cmd,
-           *zero_assoc_jump_cmd1, *zero_assoc_jump_cmd2;
        u32 *desc;
+       int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+                       ctx->cdata.keylen;
 
-       if (!ctx->enckeylen || !ctx->authsize)
+       if (!ctx->cdata.keylen || !ctx->authsize)
                return 0;
 
        /*
@@ -787,82 +393,16 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
         * Job Descriptor and Shared Descriptor
         * must fit into the 64-word Descriptor h/w Buffer
         */
-       if (DESC_GCM_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-           ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
+       if (rem_bytes >= DESC_GCM_ENC_LEN) {
+               ctx->cdata.key_inline = true;
+               ctx->cdata.key_virt = ctx->key;
+       } else {
+               ctx->cdata.key_inline = false;
+               ctx->cdata.key_dma = ctx->key_dma;
+       }
 
        desc = ctx->sh_desc_enc;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* skip key loading if they are loaded due to sharing */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD | JUMP_COND_SELF);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                                 ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       else
-               append_key(desc, ctx->key_dma, ctx->enckeylen,
-                          CLASS_1 | KEY_DEST_CLASS_REG);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* if assoclen + cryptlen is ZERO, skip to ICV write */
-       append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-       zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
-                                                JUMP_COND_MATH_Z);
-
-       /* if assoclen is ZERO, skip reading the assoc data */
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-       zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
-                                                JUMP_COND_MATH_Z);
-
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /* skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-       /* cryptlen = seqinlen - assoclen */
-       append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
-
-       /* if cryptlen is ZERO jump to zero-payload commands */
-       zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
-                                           JUMP_COND_MATH_Z);
-
-       /* read assoc data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-       set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
-
-       append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /* write encrypted data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-       /* read payload data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
-
-       /* jump the zero-payload commands */
-       append_jump(desc, JUMP_TEST_ALL | 2);
-
-       /* zero-payload commands */
-       set_jump_tgt_here(desc, zero_payload_jump_cmd);
-
-       /* read assoc data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
-
-       /* There is no input data */
-       set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
-
-       /* write ICV */
-       append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-                        LDST_SRCDST_BYTE_CONTEXT);
-
+       cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -870,80 +410,21 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "gcm enc shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        /*
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       keys_fit_inline = false;
-       if (DESC_GCM_DEC_LEN + GCM_DESC_JOB_IO_LEN +
-           ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
+       if (rem_bytes >= DESC_GCM_DEC_LEN) {
+               ctx->cdata.key_inline = true;
+               ctx->cdata.key_virt = ctx->key;
+       } else {
+               ctx->cdata.key_inline = false;
+               ctx->cdata.key_dma = ctx->key_dma;
+       }
 
        desc = ctx->sh_desc_dec;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* skip key loading if they are loaded due to sharing */
-       key_jump_cmd = append_jump(desc, JUMP_JSL |
-                                  JUMP_TEST_ALL | JUMP_COND_SHRD |
-                                  JUMP_COND_SELF);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                                 ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       else
-               append_key(desc, ctx->key_dma, ctx->enckeylen,
-                          CLASS_1 | KEY_DEST_CLASS_REG);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-       /* if assoclen is ZERO, skip reading the assoc data */
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-       zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
-                                                JUMP_COND_MATH_Z);
-
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /* skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-       /* read assoc data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-
-       set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
-
-       /* cryptlen = seqoutlen - assoclen */
-       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-       /* jump to zero-payload command if cryptlen is zero */
-       zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
-                                           JUMP_COND_MATH_Z);
-
-       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-       /* store encrypted data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-       /* read payload data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-       /* zero-payload command */
-       set_jump_tgt_here(desc, zero_payload_jump_cmd);
-
-       /* read ICV */
-       append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 |
-                            FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-
+       cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize);
        ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -951,11 +432,6 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "gcm dec shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        return 0;
 }
@@ -974,11 +450,11 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
        struct caam_ctx *ctx = crypto_aead_ctx(aead);
        struct device *jrdev = ctx->jrdev;
-       bool keys_fit_inline = false;
-       u32 *key_jump_cmd;
        u32 *desc;
+       int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+                       ctx->cdata.keylen;
 
-       if (!ctx->enckeylen || !ctx->authsize)
+       if (!ctx->cdata.keylen || !ctx->authsize)
                return 0;
 
        /*
@@ -986,62 +462,16 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
         * Job Descriptor and Shared Descriptor
         * must fit into the 64-word Descriptor h/w Buffer
         */
-       if (DESC_RFC4106_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-           ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
+       if (rem_bytes >= DESC_RFC4106_ENC_LEN) {
+               ctx->cdata.key_inline = true;
+               ctx->cdata.key_virt = ctx->key;
+       } else {
+               ctx->cdata.key_inline = false;
+               ctx->cdata.key_dma = ctx->key_dma;
+       }
 
        desc = ctx->sh_desc_enc;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* Skip key loading if it is loaded due to sharing */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                                 ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       else
-               append_key(desc, ctx->key_dma, ctx->enckeylen,
-                          CLASS_1 | KEY_DEST_CLASS_REG);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* Class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /* Read assoc data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-
-       /* Skip IV */
-       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-       /* Will read cryptlen bytes */
-       append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
-
-       /* Skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-       /* cryptlen = seqoutlen - assoclen */
-       append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /* Write encrypted data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-       /* Read payload data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
-
-       /* Write ICV */
-       append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-                        LDST_SRCDST_BYTE_CONTEXT);
-
+       cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -1049,73 +479,21 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "rfc4106 enc shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        /*
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       keys_fit_inline = false;
-       if (DESC_RFC4106_DEC_LEN + DESC_JOB_IO_LEN +
-           ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
+       if (rem_bytes >= DESC_RFC4106_DEC_LEN) {
+               ctx->cdata.key_inline = true;
+               ctx->cdata.key_virt = ctx->key;
+       } else {
+               ctx->cdata.key_inline = false;
+               ctx->cdata.key_dma = ctx->key_dma;
+       }
 
        desc = ctx->sh_desc_dec;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* Skip key loading if it is loaded due to sharing */
-       key_jump_cmd = append_jump(desc, JUMP_JSL |
-                                  JUMP_TEST_ALL | JUMP_COND_SHRD);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                                 ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       else
-               append_key(desc, ctx->key_dma, ctx->enckeylen,
-                          CLASS_1 | KEY_DEST_CLASS_REG);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* Class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-       append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
-       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
-
-       /* Read assoc data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
-
-       /* Skip IV */
-       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-       /* Will read cryptlen bytes */
-       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
-
-       /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
-
-       /* Skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-       /* Will write cryptlen bytes */
-       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-       /* Store payload data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-       /* Read encrypted data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
-                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
-
-       /* Read ICV */
-       append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 |
-                            FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-
+       cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize);
        ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -1123,11 +501,6 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "rfc4106 dec shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        return 0;
 }
@@ -1147,12 +520,11 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
        struct caam_ctx *ctx = crypto_aead_ctx(aead);
        struct device *jrdev = ctx->jrdev;
-       bool keys_fit_inline = false;
-       u32 *key_jump_cmd;
-       u32 *read_move_cmd, *write_move_cmd;
        u32 *desc;
+       int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+                       ctx->cdata.keylen;
 
-       if (!ctx->enckeylen || !ctx->authsize)
+       if (!ctx->cdata.keylen || !ctx->authsize)
                return 0;
 
        /*
@@ -1160,61 +532,16 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
         * Job Descriptor and Shared Descriptor
         * must fit into the 64-word Descriptor h/w Buffer
         */
-       if (DESC_RFC4543_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-           ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
+       if (rem_bytes >= DESC_RFC4543_ENC_LEN) {
+               ctx->cdata.key_inline = true;
+               ctx->cdata.key_virt = ctx->key;
+       } else {
+               ctx->cdata.key_inline = false;
+               ctx->cdata.key_dma = ctx->key_dma;
+       }
 
        desc = ctx->sh_desc_enc;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* Skip key loading if it is loaded due to sharing */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                                 ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       else
-               append_key(desc, ctx->key_dma, ctx->enckeylen,
-                          CLASS_1 | KEY_DEST_CLASS_REG);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* Class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* assoclen + cryptlen = seqinlen */
-       append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /*
-        * MOVE_LEN opcode is not available in all SEC HW revisions,
-        * thus need to do some magic, i.e. self-patch the descriptor
-        * buffer.
-        */
-       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
-                                   (0x6 << MOVE_LEN_SHIFT));
-       write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
-                                    (0x8 << MOVE_LEN_SHIFT));
-
-       /* Will read assoclen + cryptlen bytes */
-       append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /* Will write assoclen + cryptlen bytes */
-       append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /* Read and write assoclen + cryptlen bytes */
-       aead_append_src_dst(desc, FIFOLD_TYPE_AAD);
-
-       set_move_tgt_here(desc, read_move_cmd);
-       set_move_tgt_here(desc, write_move_cmd);
-       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-       /* Move payload data to OFIFO */
-       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
-
-       /* Write ICV */
-       append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB |
-                        LDST_SRCDST_BYTE_CONTEXT);
-
+       cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -1222,77 +549,21 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "rfc4543 enc shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        /*
         * Job Descriptor and Shared Descriptors
         * must all fit into the 64-word Descriptor h/w Buffer
         */
-       keys_fit_inline = false;
-       if (DESC_RFC4543_DEC_LEN + GCM_DESC_JOB_IO_LEN +
-           ctx->enckeylen <= CAAM_DESC_BYTES_MAX)
-               keys_fit_inline = true;
+       if (rem_bytes >= DESC_RFC4543_DEC_LEN) {
+               ctx->cdata.key_inline = true;
+               ctx->cdata.key_virt = ctx->key;
+       } else {
+               ctx->cdata.key_inline = false;
+               ctx->cdata.key_dma = ctx->key_dma;
+       }
 
        desc = ctx->sh_desc_dec;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* Skip key loading if it is loaded due to sharing */
-       key_jump_cmd = append_jump(desc, JUMP_JSL |
-                                  JUMP_TEST_ALL | JUMP_COND_SHRD);
-       if (keys_fit_inline)
-               append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                                 ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-       else
-               append_key(desc, ctx->key_dma, ctx->enckeylen,
-                          CLASS_1 | KEY_DEST_CLASS_REG);
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* Class 1 operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
-
-       /* assoclen + cryptlen = seqoutlen */
-       append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-       /*
-        * MOVE_LEN opcode is not available in all SEC HW revisions,
-        * thus need to do some magic, i.e. self-patch the descriptor
-        * buffer.
-        */
-       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
-                                   (0x6 << MOVE_LEN_SHIFT));
-       write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
-                                    (0x8 << MOVE_LEN_SHIFT));
-
-       /* Will read assoclen + cryptlen bytes */
-       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-       /* Will write assoclen + cryptlen bytes */
-       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
-
-       /* Store payload data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
-
-       /* In-snoop assoclen + cryptlen data */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF |
-                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1);
-
-       set_move_tgt_here(desc, read_move_cmd);
-       set_move_tgt_here(desc, write_move_cmd);
-       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-       /* Move payload data to OFIFO */
-       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
-       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-       /* Read ICV */
-       append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 |
-                            FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
-
+       cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize);
        ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -1300,11 +571,6 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "rfc4543 dec shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
        return 0;
 }
@@ -1320,19 +586,9 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc,
        return 0;
 }
 
-static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in,
-                             u32 authkeylen)
-{
-       return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
-                              ctx->split_key_pad_len, key_in, authkeylen,
-                              ctx->alg_op);
-}
-
 static int aead_setkey(struct crypto_aead *aead,
                               const u8 *key, unsigned int keylen)
 {
-       /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
-       static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
        struct caam_ctx *ctx = crypto_aead_ctx(aead);
        struct device *jrdev = ctx->jrdev;
        struct crypto_authenc_keys keys;
@@ -1341,33 +597,25 @@ static int aead_setkey(struct crypto_aead *aead,
        if (crypto_authenc_extractkeys(&keys, key, keylen) != 0)
                goto badkey;
 
-       /* Pick class 2 key length from algorithm submask */
-       ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
-                                     OP_ALG_ALGSEL_SHIFT] * 2;
-       ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
-
-       if (ctx->split_key_pad_len + keys.enckeylen > CAAM_MAX_KEY_SIZE)
-               goto badkey;
-
 #ifdef DEBUG
        printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n",
               keys.authkeylen + keys.enckeylen, keys.enckeylen,
               keys.authkeylen);
-       printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
-              ctx->split_key_len, ctx->split_key_pad_len);
        print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 #endif
 
-       ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen);
+       ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, keys.authkey,
+                           keys.authkeylen, CAAM_MAX_KEY_SIZE -
+                           keys.enckeylen);
        if (ret) {
                goto badkey;
        }
 
        /* postpend encryption key to auth split key */
-       memcpy(ctx->key + ctx->split_key_pad_len, keys.enckey, keys.enckeylen);
+       memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen);
 
-       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len +
+       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->adata.keylen_pad +
                                      keys.enckeylen, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->key_dma)) {
                dev_err(jrdev, "unable to map key i/o memory\n");
@@ -1376,14 +624,14 @@ static int aead_setkey(struct crypto_aead *aead,
 #ifdef DEBUG
        print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-                      ctx->split_key_pad_len + keys.enckeylen, 1);
+                      ctx->adata.keylen_pad + keys.enckeylen, 1);
 #endif
 
-       ctx->enckeylen = keys.enckeylen;
+       ctx->cdata.keylen = keys.enckeylen;
 
        ret = aead_set_sh_desc(aead);
        if (ret) {
-               dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len +
+               dma_unmap_single(jrdev, ctx->key_dma, ctx->adata.keylen_pad +
                                 keys.enckeylen, DMA_TO_DEVICE);
        }
 
@@ -1412,11 +660,11 @@ static int gcm_setkey(struct crypto_aead *aead,
                dev_err(jrdev, "unable to map key i/o memory\n");
                return -ENOMEM;
        }
-       ctx->enckeylen = keylen;
+       ctx->cdata.keylen = keylen;
 
        ret = gcm_set_sh_desc(aead);
        if (ret) {
-               dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen,
+               dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen,
                                 DMA_TO_DEVICE);
        }
 
@@ -1444,9 +692,9 @@ static int rfc4106_setkey(struct crypto_aead *aead,
         * The last four bytes of the key material are used as the salt value
         * in the nonce. Update the AES key length.
         */
-       ctx->enckeylen = keylen - 4;
+       ctx->cdata.keylen = keylen - 4;
 
-       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen,
+       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->cdata.keylen,
                                      DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->key_dma)) {
                dev_err(jrdev, "unable to map key i/o memory\n");
@@ -1455,7 +703,7 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 
        ret = rfc4106_set_sh_desc(aead);
        if (ret) {
-               dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen,
+               dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen,
                                 DMA_TO_DEVICE);
        }
 
@@ -1483,9 +731,9 @@ static int rfc4543_setkey(struct crypto_aead *aead,
         * The last four bytes of the key material are used as the salt value
         * in the nonce. Update the AES key length.
         */
-       ctx->enckeylen = keylen - 4;
+       ctx->cdata.keylen = keylen - 4;
 
-       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen,
+       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->cdata.keylen,
                                      DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->key_dma)) {
                dev_err(jrdev, "unable to map key i/o memory\n");
@@ -1494,7 +742,7 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 
        ret = rfc4543_set_sh_desc(aead);
        if (ret) {
-               dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen,
+               dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen,
                                 DMA_TO_DEVICE);
        }
 
@@ -1505,21 +753,18 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
                             const u8 *key, unsigned int keylen)
 {
        struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
-       struct ablkcipher_tfm *crt = &ablkcipher->base.crt_ablkcipher;
        struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablkcipher);
        const char *alg_name = crypto_tfm_alg_name(tfm);
        struct device *jrdev = ctx->jrdev;
-       int ret = 0;
-       u32 *key_jump_cmd;
+       unsigned int ivsize = crypto_ablkcipher_ivsize(ablkcipher);
        u32 *desc;
-       u8 *nonce;
-       u32 geniv;
        u32 ctx1_iv_off = 0;
-       const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
+       const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
                               OP_ALG_AAI_CTR_MOD128);
        const bool is_rfc3686 = (ctr_mode &&
                                 (strstr(alg_name, "rfc3686") != NULL));
 
+       memcpy(ctx->key, key, keylen);
 #ifdef DEBUG
        print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -1542,60 +787,20 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
                keylen -= CTR_RFC3686_NONCE_SIZE;
        }
 
-       memcpy(ctx->key, key, keylen);
        ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen,
                                      DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->key_dma)) {
                dev_err(jrdev, "unable to map key i/o memory\n");
                return -ENOMEM;
        }
-       ctx->enckeylen = keylen;
+       ctx->cdata.keylen = keylen;
+       ctx->cdata.key_virt = ctx->key;
+       ctx->cdata.key_inline = true;
 
        /* ablkcipher_encrypt shared descriptor */
        desc = ctx->sh_desc_enc;
-       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-
-       /* Load class1 key only */
-       append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                         ctx->enckeylen, CLASS_1 |
-                         KEY_DEST_CLASS_REG);
-
-       /* Load nonce into CONTEXT1 reg */
-       if (is_rfc3686) {
-               nonce = (u8 *)key + keylen;
-               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-                                  LDST_CLASS_IND_CCB |
-                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-               append_move(desc, MOVE_WAITCOMP |
-                           MOVE_SRC_OUTFIFO |
-                           MOVE_DEST_CLASS1CTX |
-                           (16 << MOVE_OFFSET_SHIFT) |
-                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-       }
-
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* Load iv */
-       append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT |
-                       LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
-
-       /* Load counter into CONTEXT1 reg */
-       if (is_rfc3686)
-               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-                                    LDST_SRCDST_BYTE_CONTEXT |
-                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                     LDST_OFFSET_SHIFT));
-
-       /* Load operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* Perform operation */
-       ablkcipher_append_src_dst(desc);
-
+       cnstr_shdsc_ablkcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686,
+                                    ctx1_iv_off);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -1603,61 +808,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR,
-                      "ablkcipher enc shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
+
        /* ablkcipher_decrypt shared descriptor */
        desc = ctx->sh_desc_dec;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-
-       /* Load class1 key only */
-       append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                         ctx->enckeylen, CLASS_1 |
-                         KEY_DEST_CLASS_REG);
-
-       /* Load nonce into CONTEXT1 reg */
-       if (is_rfc3686) {
-               nonce = (u8 *)key + keylen;
-               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-                                  LDST_CLASS_IND_CCB |
-                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-               append_move(desc, MOVE_WAITCOMP |
-                           MOVE_SRC_OUTFIFO |
-                           MOVE_DEST_CLASS1CTX |
-                           (16 << MOVE_OFFSET_SHIFT) |
-                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-       }
-
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* load IV */
-       append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT |
-                       LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
-
-       /* Load counter into CONTEXT1 reg */
-       if (is_rfc3686)
-               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-                                    LDST_SRCDST_BYTE_CONTEXT |
-                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                     LDST_OFFSET_SHIFT));
-
-       /* Choose operation */
-       if (ctr_mode)
-               append_operation(desc, ctx->class1_alg_type |
-                                OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT);
-       else
-               append_dec_op1(desc, ctx->class1_alg_type);
-
-       /* Perform operation */
-       ablkcipher_append_src_dst(desc);
-
+       cnstr_shdsc_ablkcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686,
+                                    ctx1_iv_off);
        ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc,
                                              desc_bytes(desc),
                                              DMA_TO_DEVICE);
@@ -1666,76 +821,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
                return -ENOMEM;
        }
 
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR,
-                      "ablkcipher dec shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
        /* ablkcipher_givencrypt shared descriptor */
        desc = ctx->sh_desc_givenc;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-
-       /* Load class1 key only */
-       append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                         ctx->enckeylen, CLASS_1 |
-                         KEY_DEST_CLASS_REG);
-
-       /* Load Nonce into CONTEXT1 reg */
-       if (is_rfc3686) {
-               nonce = (u8 *)key + keylen;
-               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
-                                  LDST_CLASS_IND_CCB |
-                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
-               append_move(desc, MOVE_WAITCOMP |
-                           MOVE_SRC_OUTFIFO |
-                           MOVE_DEST_CLASS1CTX |
-                           (16 << MOVE_OFFSET_SHIFT) |
-                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
-       }
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /* Generate IV */
-       geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
-               NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-               NFIFOENTRY_PTYPE_RND | (crt->ivsize << NFIFOENTRY_DLEN_SHIFT);
-       append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
-                           LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
-       append_move(desc, MOVE_WAITCOMP |
-                   MOVE_SRC_INFIFO |
-                   MOVE_DEST_CLASS1CTX |
-                   (crt->ivsize << MOVE_LEN_SHIFT) |
-                   (ctx1_iv_off << MOVE_OFFSET_SHIFT));
-       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
-
-       /* Copy generated IV to memory */
-       append_seq_store(desc, crt->ivsize,
-                        LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
-                        (ctx1_iv_off << LDST_OFFSET_SHIFT));
-
-       /* Load Counter into CONTEXT1 reg */
-       if (is_rfc3686)
-               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
-                                    LDST_SRCDST_BYTE_CONTEXT |
-                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
-                                     LDST_OFFSET_SHIFT));
-
-       if (ctx1_iv_off)
-               append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
-                           (1 << JUMP_OFFSET_SHIFT));
-
-       /* Load operation */
-       append_operation(desc, ctx->class1_alg_type |
-                        OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
-
-       /* Perform operation */
-       ablkcipher_append_src_dst(desc);
-
+       cnstr_shdsc_ablkcipher_givencap(desc, &ctx->cdata, ivsize, is_rfc3686,
+                                       ctx1_iv_off);
        ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc,
                                                 desc_bytes(desc),
                                                 DMA_TO_DEVICE);
@@ -1743,14 +832,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR,
-                      "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
 
-       return ret;
+       return 0;
 }
 
 static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
@@ -1758,8 +841,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 {
        struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
        struct device *jrdev = ctx->jrdev;
-       u32 *key_jump_cmd, *desc;
-       __be64 sector_size = cpu_to_be64(512);
+       u32 *desc;
 
        if (keylen != 2 * AES_MIN_KEY_SIZE  && keylen != 2 * AES_MAX_KEY_SIZE) {
                crypto_ablkcipher_set_flags(ablkcipher,
@@ -1774,88 +856,23 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
                dev_err(jrdev, "unable to map key i/o memory\n");
                return -ENOMEM;
        }
-       ctx->enckeylen = keylen;
+       ctx->cdata.keylen = keylen;
+       ctx->cdata.key_virt = ctx->key;
+       ctx->cdata.key_inline = true;
 
        /* xts_ablkcipher_encrypt shared descriptor */
        desc = ctx->sh_desc_enc;
-       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-
-       /* Load class1 keys only */
-       append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                         ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-
-       /* Load sector size with index 40 bytes (0x28) */
-       append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
-                  LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
-       append_data(desc, (void *)&sector_size, 8);
-
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /*
-        * create sequence for loading the sector index
-        * Upper 8B of IV - will be used as sector index
-        * Lower 8B of IV - will be discarded
-        */
-       append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-                  LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
-       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-       /* Load operation */
-       append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL |
-                        OP_ALG_ENCRYPT);
-
-       /* Perform operation */
-       ablkcipher_append_src_dst(desc);
-
+       cnstr_shdsc_xts_ablkcipher_encap(desc, &ctx->cdata);
        ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
                                              DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR,
-                      "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
 
        /* xts_ablkcipher_decrypt shared descriptor */
        desc = ctx->sh_desc_dec;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
-       /* Skip if already shared */
-       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                  JUMP_COND_SHRD);
-
-       /* Load class1 key only */
-       append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen,
-                         ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-
-       /* Load sector size with index 40 bytes (0x28) */
-       append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
-                  LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
-       append_data(desc, (void *)&sector_size, 8);
-
-       set_jump_tgt_here(desc, key_jump_cmd);
-
-       /*
-        * create sequence for loading the sector index
-        * Upper 8B of IV - will be used as sector index
-        * Lower 8B of IV - will be discarded
-        */
-       append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-                  LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
-       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
-
-       /* Load operation */
-       append_dec_op1(desc, ctx->class1_alg_type);
-
-       /* Perform operation */
-       ablkcipher_append_src_dst(desc);
-
+       cnstr_shdsc_xts_ablkcipher_decap(desc, &ctx->cdata);
        ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
                                              DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) {
@@ -1864,31 +881,22 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
                dev_err(jrdev, "unable to map shared descriptor\n");
                return -ENOMEM;
        }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR,
-                      "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
-#endif
 
        return 0;
 }
 
 /*
  * aead_edesc - s/w-extended aead descriptor
- * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist
  * @src_nents: number of segments in input scatterlist
  * @dst_nents: number of segments in output scatterlist
- * @iv_dma: dma address of iv for checking continuity and link table
- * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg_dma: bus physical mapped address of h/w link table
+ * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  */
 struct aead_edesc {
-       int assoc_nents;
        int src_nents;
        int dst_nents;
-       dma_addr_t iv_dma;
        int sec4_sg_bytes;
        dma_addr_t sec4_sg_dma;
        struct sec4_sg_entry *sec4_sg;
@@ -1900,9 +908,9 @@ struct aead_edesc {
  * @src_nents: number of segments in input scatterlist
  * @dst_nents: number of segments in output scatterlist
  * @iv_dma: dma address of iv for checking continuity and link table
- * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE)
  * @sec4_sg_bytes: length of dma mapped sec4_sg space
  * @sec4_sg_dma: bus physical mapped address of h/w link table
+ * @sec4_sg: pointer to h/w link table
  * @hw_desc: the h/w job descriptor followed by any referenced link tables
  */
 struct ablkcipher_edesc {
@@ -2019,8 +1027,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
        dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-       edesc = (struct ablkcipher_edesc *)((char *)desc -
-                offsetof(struct ablkcipher_edesc, hw_desc));
+       edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
 
        if (err)
                caam_jr_strstatus(jrdev, err);
@@ -2031,7 +1038,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
                       edesc->src_nents > 1 ? 100 : ivsize, 1);
        dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
                    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
+                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
        ablkcipher_unmap(jrdev, edesc, req);
@@ -2052,8 +1059,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
        dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-       edesc = (struct ablkcipher_edesc *)((char *)desc -
-                offsetof(struct ablkcipher_edesc, hw_desc));
+       edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]);
        if (err)
                caam_jr_strstatus(jrdev, err);
 
@@ -2063,7 +1069,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
                       ivsize, 1);
        dbg_dump_sg(KERN_ERR, "dst    @"__stringify(__LINE__)": ",
                    DUMP_PREFIX_ADDRESS, 16, 4, req->dst,
-                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true);
+                   edesc->dst_nents > 1 ? 100 : req->nbytes, 1);
 #endif
 
        ablkcipher_unmap(jrdev, edesc, req);
@@ -2157,7 +1163,7 @@ static void init_gcm_job(struct aead_request *req,
                         FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | 12 | last);
        /* Append Salt */
        if (!generic_gcm)
-               append_data(desc, ctx->key + ctx->enckeylen, 4);
+               append_data(desc, ctx->key + ctx->cdata.keylen, 4);
        /* Append IV */
        append_data(desc, req->iv, ivsize);
        /* End of blank commands */
@@ -2172,7 +1178,7 @@ static void init_authenc_job(struct aead_request *req,
                                                 struct caam_aead_alg, aead);
        unsigned int ivsize = crypto_aead_ivsize(aead);
        struct caam_ctx *ctx = crypto_aead_ctx(aead);
-       const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) ==
+       const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
                               OP_ALG_AAI_CTR_MOD128);
        const bool is_rfc3686 = alg->caam.rfc3686;
        u32 *desc = edesc->hw_desc;
@@ -2218,15 +1224,13 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr,
        int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
-       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
        print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
        printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes);
        dbg_dump_sg(KERN_ERR, "src    @"__stringify(__LINE__)": ",
                    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-                   edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
+                   edesc->src_nents ? 100 : req->nbytes, 1);
 #endif
 
        len = desc_len(sh_desc);
@@ -2278,14 +1282,12 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr,
        int len, sec4_sg_index = 0;
 
 #ifdef DEBUG
-       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
        print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, req->info,
                       ivsize, 1);
        dbg_dump_sg(KERN_ERR, "src    @" __stringify(__LINE__) ": ",
                    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-                   edesc->src_nents ? 100 : req->nbytes, 1, may_sleep);
+                   edesc->src_nents ? 100 : req->nbytes, 1);
 #endif
 
        len = desc_len(sh_desc);
@@ -2344,10 +1346,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 
        /* Check if data are contiguous. */
        all_contig = !src_nents;
-       if (!all_contig) {
-               src_nents = src_nents ? : 1;
+       if (!all_contig)
                sec4_sg_len = src_nents;
-       }
 
        sec4_sg_len += dst_nents;
 
@@ -2556,11 +1556,9 @@ static int aead_decrypt(struct aead_request *req)
        int ret = 0;
 
 #ifdef DEBUG
-       bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG |
-                                             CRYPTO_TFM_REQ_MAY_SLEEP)) != 0);
        dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ",
                    DUMP_PREFIX_ADDRESS, 16, 4, req->src,
-                   req->assoclen + req->cryptlen, 1, may_sleep);
+                   req->assoclen + req->cryptlen, 1);
 #endif
 
        /* allocate extended descriptor */
@@ -2618,16 +1616,33 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
        if (likely(req->src == req->dst)) {
                sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
                                 DMA_BIDIRECTIONAL);
+               if (unlikely(!sgc)) {
+                       dev_err(jrdev, "unable to map source\n");
+                       return ERR_PTR(-ENOMEM);
+               }
        } else {
                sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
                                 DMA_TO_DEVICE);
+               if (unlikely(!sgc)) {
+                       dev_err(jrdev, "unable to map source\n");
+                       return ERR_PTR(-ENOMEM);
+               }
+
                sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
                                 DMA_FROM_DEVICE);
+               if (unlikely(!sgc)) {
+                       dev_err(jrdev, "unable to map destination\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents ? : 1,
+                                    DMA_TO_DEVICE);
+                       return ERR_PTR(-ENOMEM);
+               }
        }
 
        iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, iv_dma)) {
                dev_err(jrdev, "unable to map IV\n");
+               caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+                          0, 0, 0);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -2647,6 +1662,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
                        GFP_DMA | flags);
        if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
+               caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+                          iv_dma, ivsize, 0, 0);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -2673,6 +1690,9 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request
                                            sec4_sg_bytes, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                dev_err(jrdev, "unable to map S/G table\n");
+               caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+                          iv_dma, ivsize, 0, 0);
+               kfree(edesc);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -2794,11 +1814,26 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
        if (likely(req->src == req->dst)) {
                sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
                                 DMA_BIDIRECTIONAL);
+               if (unlikely(!sgc)) {
+                       dev_err(jrdev, "unable to map source\n");
+                       return ERR_PTR(-ENOMEM);
+               }
        } else {
                sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1,
                                 DMA_TO_DEVICE);
+               if (unlikely(!sgc)) {
+                       dev_err(jrdev, "unable to map source\n");
+                       return ERR_PTR(-ENOMEM);
+               }
+
                sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1,
                                 DMA_FROM_DEVICE);
+               if (unlikely(!sgc)) {
+                       dev_err(jrdev, "unable to map destination\n");
+                       dma_unmap_sg(jrdev, req->src, src_nents ? : 1,
+                                    DMA_TO_DEVICE);
+                       return ERR_PTR(-ENOMEM);
+               }
        }
 
        /*
@@ -2808,6 +1843,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
        iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, iv_dma)) {
                dev_err(jrdev, "unable to map IV\n");
+               caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0,
+                          0, 0, 0);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -2823,6 +1860,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
                        GFP_DMA | flags);
        if (!edesc) {
                dev_err(jrdev, "could not allocate extended descriptor\n");
+               caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+                          iv_dma, ivsize, 0, 0);
                return ERR_PTR(-ENOMEM);
        }
 
@@ -2850,6 +1889,9 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc(
                                            sec4_sg_bytes, DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) {
                dev_err(jrdev, "unable to map S/G table\n");
+               caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents,
+                          iv_dma, ivsize, 0, 0);
+               kfree(edesc);
                return ERR_PTR(-ENOMEM);
        }
        edesc->iv_dma = iv_dma;
@@ -2916,7 +1958,6 @@ struct caam_alg_template {
        } template_u;
        u32 class1_alg_type;
        u32 class2_alg_type;
-       u32 alg_op;
 };
 
 static struct caam_alg_template driver_algs[] = {
@@ -3101,7 +2142,6 @@ static struct caam_aead_alg driver_aeads[] = {
                .caam = {
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3123,7 +2163,6 @@ static struct caam_aead_alg driver_aeads[] = {
                .caam = {
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3145,7 +2184,6 @@ static struct caam_aead_alg driver_aeads[] = {
                .caam = {
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3167,7 +2205,6 @@ static struct caam_aead_alg driver_aeads[] = {
                .caam = {
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3189,7 +2226,6 @@ static struct caam_aead_alg driver_aeads[] = {
                .caam = {
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3211,7 +2247,6 @@ static struct caam_aead_alg driver_aeads[] = {
                .caam = {
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3233,7 +2268,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3256,7 +2290,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3279,7 +2312,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3302,7 +2334,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3325,7 +2356,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3348,7 +2378,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3371,7 +2400,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3394,7 +2422,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3417,7 +2444,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3440,7 +2466,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3463,7 +2488,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3486,7 +2510,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3509,7 +2532,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                }
        },
        {
@@ -3532,7 +2554,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                }
        },
@@ -3556,7 +2577,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3580,7 +2600,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3604,7 +2623,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3628,7 +2646,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3652,7 +2669,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3676,7 +2692,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3700,7 +2715,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3724,7 +2738,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3748,7 +2761,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3772,7 +2784,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3795,7 +2806,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3818,7 +2828,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3841,7 +2850,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3864,7 +2872,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3887,7 +2894,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3910,7 +2916,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3933,7 +2938,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -3956,7 +2960,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -3979,7 +2982,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -4002,7 +3004,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -4025,7 +3026,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                },
        },
        {
@@ -4048,7 +3048,6 @@ static struct caam_aead_alg driver_aeads[] = {
                        .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                        .geniv = true,
                },
        },
@@ -4073,7 +3072,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                },
        },
@@ -4098,7 +3096,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_MD5 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                        .geniv = true,
                },
@@ -4124,7 +3121,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                },
        },
@@ -4149,7 +3145,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA1 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                        .geniv = true,
                },
@@ -4175,7 +3170,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                },
        },
@@ -4200,7 +3194,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA224 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                        .geniv = true,
                },
@@ -4226,7 +3219,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                },
        },
@@ -4251,7 +3243,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA256 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                        .geniv = true,
                },
@@ -4277,7 +3268,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                },
        },
@@ -4302,7 +3292,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA384 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                        .geniv = true,
                },
@@ -4328,7 +3317,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                },
        },
@@ -4353,7 +3341,6 @@ static struct caam_aead_alg driver_aeads[] = {
                                           OP_ALG_AAI_CTR_MOD128,
                        .class2_alg_type = OP_ALG_ALGSEL_SHA512 |
                                           OP_ALG_AAI_HMAC_PRECOMP,
-                       .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
                        .rfc3686 = true,
                        .geniv = true,
                },
@@ -4375,9 +3362,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam)
        }
 
        /* copy descriptor header template value */
-       ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
-       ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
-       ctx->alg_op = OP_TYPE_CLASS2_ALG | caam->alg_op;
+       ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type;
+       ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam->class2_alg_type;
 
        return 0;
 }
@@ -4420,7 +3406,7 @@ static void caam_exit_common(struct caam_ctx *ctx)
        if (ctx->key_dma &&
            !dma_mapping_error(ctx->jrdev, ctx->key_dma))
                dma_unmap_single(ctx->jrdev, ctx->key_dma,
-                                ctx->enckeylen + ctx->split_key_pad_len,
+                                ctx->cdata.keylen + ctx->adata.keylen_pad,
                                 DMA_TO_DEVICE);
 
        caam_jr_free(ctx->jrdev);
@@ -4498,7 +3484,6 @@ static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template
 
        t_alg->caam.class1_alg_type = template->class1_alg_type;
        t_alg->caam.class2_alg_type = template->class2_alg_type;
-       t_alg->caam.alg_op = template->alg_op;
 
        return t_alg;
 }
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
new file mode 100644 (file)
index 0000000..f3f48c1
--- /dev/null
@@ -0,0 +1,1306 @@
+/*
+ * Shared descriptors for aead, ablkcipher algorithms
+ *
+ * Copyright 2016 NXP
+ */
+
+#include "compat.h"
+#include "desc_constr.h"
+#include "caamalg_desc.h"
+
+/*
+ * For aead functions, read payload and write payload,
+ * both of which are specified in req->src and req->dst
+ */
+static inline void aead_append_src_dst(u32 *desc, u32 msg_type)
+{
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH |
+                            KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH);
+}
+
+/* Set DK bit in class 1 operation if shared */
+static inline void append_dec_op1(u32 *desc, u32 type)
+{
+       u32 *jump_cmd, *uncond_jump_cmd;
+
+       /* DK bit is valid only for AES */
+       if ((type & OP_ALG_ALGSEL_MASK) != OP_ALG_ALGSEL_AES) {
+               append_operation(desc, type | OP_ALG_AS_INITFINAL |
+                                OP_ALG_DECRYPT);
+               return;
+       }
+
+       jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD);
+       append_operation(desc, type | OP_ALG_AS_INITFINAL |
+                        OP_ALG_DECRYPT);
+       uncond_jump_cmd = append_jump(desc, JUMP_TEST_ALL);
+       set_jump_tgt_here(desc, jump_cmd);
+       append_operation(desc, type | OP_ALG_AS_INITFINAL |
+                        OP_ALG_DECRYPT | OP_ALG_AAI_DK);
+       set_jump_tgt_here(desc, uncond_jump_cmd);
+}
+
+/**
+ * cnstr_shdsc_aead_null_encap - IPSec ESP encapsulation shared descriptor
+ *                               (non-protocol) with no (null) encryption.
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
+                                unsigned int icvsize)
+{
+       u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+       if (adata->key_inline)
+               append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
+                                 adata->keylen, CLASS_2 | KEY_DEST_MDHA_SPLIT |
+                                 KEY_ENC);
+       else
+               append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
+                          KEY_DEST_MDHA_SPLIT | KEY_ENC);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* assoclen + cryptlen = seqinlen */
+       append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /* Prepare to read and write cryptlen + assoclen bytes */
+       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /*
+        * MOVE_LEN opcode is not available in all SEC HW revisions,
+        * thus need to do some magic, i.e. self-patch the descriptor
+        * buffer.
+        */
+       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
+                                   MOVE_DEST_MATH3 |
+                                   (0x6 << MOVE_LEN_SHIFT));
+       write_move_cmd = append_move(desc, MOVE_SRC_MATH3 |
+                                    MOVE_DEST_DESCBUF |
+                                    MOVE_WAITCOMP |
+                                    (0x8 << MOVE_LEN_SHIFT));
+
+       /* Class 2 operation */
+       append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Read and write cryptlen bytes */
+       aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+       set_move_tgt_here(desc, read_move_cmd);
+       set_move_tgt_here(desc, write_move_cmd);
+       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
+                   MOVE_AUX_LS);
+
+       /* Write ICV */
+       append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
+                        LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "aead null enc shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_null_encap);
+
+/**
+ * cnstr_shdsc_aead_null_decap - IPSec ESP decapsulation shared descriptor
+ *                               (non-protocol) with no (null) decryption.
+ * @desc: pointer to buffer used for descriptor construction
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
+                                unsigned int icvsize)
+{
+       u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd, *jump_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+       if (adata->key_inline)
+               append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
+                                 adata->keylen, CLASS_2 |
+                                 KEY_DEST_MDHA_SPLIT | KEY_ENC);
+       else
+               append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
+                          KEY_DEST_MDHA_SPLIT | KEY_ENC);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* Class 2 operation */
+       append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+       /* assoclen + cryptlen = seqoutlen */
+       append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+       /* Prepare to read and write cryptlen + assoclen bytes */
+       append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ);
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ);
+
+       /*
+        * MOVE_LEN opcode is not available in all SEC HW revisions,
+        * thus need to do some magic, i.e. self-patch the descriptor
+        * buffer.
+        */
+       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF |
+                                   MOVE_DEST_MATH2 |
+                                   (0x6 << MOVE_LEN_SHIFT));
+       write_move_cmd = append_move(desc, MOVE_SRC_MATH2 |
+                                    MOVE_DEST_DESCBUF |
+                                    MOVE_WAITCOMP |
+                                    (0x8 << MOVE_LEN_SHIFT));
+
+       /* Read and write cryptlen bytes */
+       aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+       /*
+        * Insert a NOP here, since we need at least 4 instructions between
+        * code patching the descriptor buffer and the location being patched.
+        */
+       jump_cmd = append_jump(desc, JUMP_TEST_ALL);
+       set_jump_tgt_here(desc, jump_cmd);
+
+       set_move_tgt_here(desc, read_move_cmd);
+       set_move_tgt_here(desc, write_move_cmd);
+       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO |
+                   MOVE_AUX_LS);
+       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+       /* Load ICV */
+       append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 |
+                            FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "aead null dec shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_null_decap);
+
+static void init_sh_desc_key_aead(u32 * const desc,
+                                 struct alginfo * const cdata,
+                                 struct alginfo * const adata,
+                                 const bool is_rfc3686, u32 *nonce)
+{
+       u32 *key_jump_cmd;
+       unsigned int enckeylen = cdata->keylen;
+
+       /* Note: Context registers are saved. */
+       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+
+       /*
+        * RFC3686 specific:
+        *      | key = {AUTH_KEY, ENC_KEY, NONCE}
+        *      | enckeylen = encryption key size + nonce size
+        */
+       if (is_rfc3686)
+               enckeylen -= CTR_RFC3686_NONCE_SIZE;
+
+       if (adata->key_inline)
+               append_key_as_imm(desc, adata->key_virt, adata->keylen_pad,
+                                 adata->keylen, CLASS_2 |
+                                 KEY_DEST_MDHA_SPLIT | KEY_ENC);
+       else
+               append_key(desc, adata->key_dma, adata->keylen, CLASS_2 |
+                          KEY_DEST_MDHA_SPLIT | KEY_ENC);
+
+       if (cdata->key_inline)
+               append_key_as_imm(desc, cdata->key_virt, enckeylen,
+                                 enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
+       else
+               append_key(desc, cdata->key_dma, enckeylen, CLASS_1 |
+                          KEY_DEST_CLASS_REG);
+
+       /* Load Counter into CONTEXT1 reg */
+       if (is_rfc3686) {
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               append_move(desc,
+                           MOVE_SRC_OUTFIFO |
+                           MOVE_DEST_CLASS1CTX |
+                           (16 << MOVE_OFFSET_SHIFT) |
+                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+       }
+
+       set_jump_tgt_here(desc, key_jump_cmd);
+}
+
+/**
+ * cnstr_shdsc_aead_encap - IPSec ESP encapsulation shared descriptor
+ *                          (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @nonce: pointer to rfc3686 nonce
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
+                           struct alginfo *adata, unsigned int icvsize,
+                           const bool is_rfc3686, u32 *nonce,
+                           const u32 ctx1_iv_off)
+{
+       /* Note: Context registers are saved. */
+       init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+
+       /* Class 2 operation */
+       append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Read and write assoclen bytes */
+       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /* Skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* read assoc before reading payload */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+                                     FIFOLDST_VLF);
+
+       /* Load Counter into CONTEXT1 reg */
+       if (is_rfc3686)
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
+
+       /* Class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Read and write cryptlen bytes */
+       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2);
+
+       /* Write ICV */
+       append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
+                        LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR, "aead enc shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_encap);
+
+/**
+ * cnstr_shdsc_aead_decap - IPSec ESP decapsulation shared descriptor
+ *                          (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @ivsize: initialization vector size
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @nonce: pointer to rfc3686 nonce
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
+                           struct alginfo *adata, unsigned int ivsize,
+                           unsigned int icvsize, const bool geniv,
+                           const bool is_rfc3686, u32 *nonce,
+                           const u32 ctx1_iv_off)
+{
+       /* Note: Context registers are saved. */
+       init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+
+       /* Class 2 operation */
+       append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+       /* Read and write assoclen bytes */
+       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       if (geniv)
+               append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
+       else
+               append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /* Skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* read assoc before reading payload */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+                            KEY_VLF);
+
+       if (geniv) {
+               append_seq_load(desc, ivsize, LDST_CLASS_1_CCB |
+                               LDST_SRCDST_BYTE_CONTEXT |
+                               (ctx1_iv_off << LDST_OFFSET_SHIFT));
+               append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO |
+                           (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize);
+       }
+
+       /* Load Counter into CONTEXT1 reg */
+       if (is_rfc3686)
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
+
+       /* Choose operation */
+       if (ctx1_iv_off)
+               append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                                OP_ALG_DECRYPT);
+       else
+               append_dec_op1(desc, cdata->algtype);
+
+       /* Read and write cryptlen bytes */
+       append_math_add(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+       append_math_add(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+       aead_append_src_dst(desc, FIFOLD_TYPE_MSG);
+
+       /* Load ICV */
+       append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 |
+                            FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR, "aead dec shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_decap);
+
+/**
+ * cnstr_shdsc_aead_givencap - IPSec ESP encapsulation shared descriptor
+ *                             (non-protocol) with HW-generated initialization
+ *                             vector.
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @adata: pointer to authentication transform definitions. Note that since a
+ *         split key is to be used, the size of the split key itself is
+ *         specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1,
+ *         SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP.
+ * @ivsize: initialization vector size
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @nonce: pointer to rfc3686 nonce
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ *
+ * Note: Requires an MDHA split key.
+ */
+void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
+                              struct alginfo *adata, unsigned int ivsize,
+                              unsigned int icvsize, const bool is_rfc3686,
+                              u32 *nonce, const u32 ctx1_iv_off)
+{
+       u32 geniv, moveiv;
+
+       /* Note: Context registers are saved. */
+       init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce);
+
+       if (is_rfc3686)
+               goto copy_iv;
+
+       /* Generate IV */
+       geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
+               NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
+               NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
+       append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
+                           LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
+       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+       append_move(desc, MOVE_WAITCOMP |
+                   MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX |
+                   (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
+                   (ivsize << MOVE_LEN_SHIFT));
+       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+copy_iv:
+       /* Copy IV to class 1 context */
+       append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO |
+                   (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
+                   (ivsize << MOVE_LEN_SHIFT));
+
+       /* Return to encryption */
+       append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Read and write assoclen bytes */
+       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /* Skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* read assoc before reading payload */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
+                            KEY_VLF);
+
+       /* Copy iv from outfifo to class 2 fifo */
+       moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 |
+                NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT);
+       append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB |
+                           LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
+       append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB |
+                           LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM);
+
+       /* Load Counter into CONTEXT1 reg */
+       if (is_rfc3686)
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
+
+       /* Class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Will write ivsize + cryptlen */
+       append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /* Not need to reload iv */
+       append_seq_fifo_load(desc, ivsize,
+                            FIFOLD_CLASS_SKIP);
+
+       /* Will read cryptlen */
+       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF |
+                            FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH);
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+
+       /* Write ICV */
+       append_seq_store(desc, icvsize, LDST_CLASS_2_CCB |
+                        LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "aead givenc shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_aead_givencap);
+
+/**
+ * cnstr_shdsc_gcm_encap - gcm encapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
+                          unsigned int icvsize)
+{
+       u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1,
+           *zero_assoc_jump_cmd2;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* skip key loading if they are loaded due to sharing */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD | JUMP_COND_SELF);
+       if (cdata->key_inline)
+               append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                                 cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+       else
+               append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+                          KEY_DEST_CLASS_REG);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* if assoclen + cryptlen is ZERO, skip to ICV write */
+       append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
+                                                JUMP_COND_MATH_Z);
+
+       /* if assoclen is ZERO, skip reading the assoc data */
+       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
+                                          JUMP_COND_MATH_Z);
+
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /* skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* cryptlen = seqinlen - assoclen */
+       append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
+
+       /* if cryptlen is ZERO jump to zero-payload commands */
+       zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
+                                           JUMP_COND_MATH_Z);
+
+       /* read assoc data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+       set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
+
+       append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /* write encrypted data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+       /* read payload data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
+
+       /* jump the zero-payload commands */
+       append_jump(desc, JUMP_TEST_ALL | 2);
+
+       /* zero-payload commands */
+       set_jump_tgt_here(desc, zero_payload_jump_cmd);
+
+       /* read assoc data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
+
+       /* There is no input data */
+       set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
+
+       /* write ICV */
+       append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
+                        LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR, "gcm enc shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_gcm_encap);
+
+/**
+ * cnstr_shdsc_gcm_decap - gcm decapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
+                          unsigned int icvsize)
+{
+       u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* skip key loading if they are loaded due to sharing */
+       key_jump_cmd = append_jump(desc, JUMP_JSL |
+                                  JUMP_TEST_ALL | JUMP_COND_SHRD |
+                                  JUMP_COND_SELF);
+       if (cdata->key_inline)
+               append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                                 cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+       else
+               append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+                          KEY_DEST_CLASS_REG);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+       /* if assoclen is ZERO, skip reading the assoc data */
+       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL |
+                                                JUMP_COND_MATH_Z);
+
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /* skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* read assoc data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+
+       set_jump_tgt_here(desc, zero_assoc_jump_cmd1);
+
+       /* cryptlen = seqoutlen - assoclen */
+       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+       /* jump to zero-payload command if cryptlen is zero */
+       zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
+                                           JUMP_COND_MATH_Z);
+
+       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+       /* store encrypted data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+       /* read payload data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+       /* zero-payload command */
+       set_jump_tgt_here(desc, zero_payload_jump_cmd);
+
+       /* read ICV */
+       append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
+                            FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR, "gcm dec shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_gcm_decap);
+
+/**
+ * cnstr_shdsc_rfc4106_encap - IPSec ESP gcm encapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize)
+{
+       u32 *key_jump_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* Skip key loading if it is loaded due to sharing */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+       if (cdata->key_inline)
+               append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                                 cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+       else
+               append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+                          KEY_DEST_CLASS_REG);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* Class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /* Read assoc data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+
+       /* Skip IV */
+       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+       /* Will read cryptlen bytes */
+       append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
+
+       /* Skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* cryptlen = seqoutlen - assoclen */
+       append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /* Write encrypted data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+       /* Read payload data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
+
+       /* Write ICV */
+       append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
+                        LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "rfc4106 enc shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4106_encap);
+
+/**
+ * cnstr_shdsc_rfc4106_decap - IPSec ESP gcm decapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize)
+{
+       u32 *key_jump_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* Skip key loading if it is loaded due to sharing */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+       if (cdata->key_inline)
+               append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                                 cdata->keylen, CLASS_1 |
+                                 KEY_DEST_CLASS_REG);
+       else
+               append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+                          KEY_DEST_CLASS_REG);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* Class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+       append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
+       append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+
+       /* Read assoc data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
+
+       /* Skip IV */
+       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+       /* Will read cryptlen bytes */
+       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
+
+       /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
+
+       /* Skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* Will write cryptlen bytes */
+       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+       /* Store payload data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+       /* Read encrypted data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+                            FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1);
+
+       /* Read ICV */
+       append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
+                            FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "rfc4106 dec shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4106_decap);
+
+/**
+ * cnstr_shdsc_rfc4543_encap - IPSec ESP gmac encapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize)
+{
+       u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* Skip key loading if it is loaded due to sharing */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+       if (cdata->key_inline)
+               append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                                 cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+       else
+               append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+                          KEY_DEST_CLASS_REG);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* Class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* assoclen + cryptlen = seqinlen */
+       append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /*
+        * MOVE_LEN opcode is not available in all SEC HW revisions,
+        * thus need to do some magic, i.e. self-patch the descriptor
+        * buffer.
+        */
+       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
+                                   (0x6 << MOVE_LEN_SHIFT));
+       write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
+                                    (0x8 << MOVE_LEN_SHIFT));
+
+       /* Will read assoclen + cryptlen bytes */
+       append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /* Will write assoclen + cryptlen bytes */
+       append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+       /* Read and write assoclen + cryptlen bytes */
+       aead_append_src_dst(desc, FIFOLD_TYPE_AAD);
+
+       set_move_tgt_here(desc, read_move_cmd);
+       set_move_tgt_here(desc, write_move_cmd);
+       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+       /* Move payload data to OFIFO */
+       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
+
+       /* Write ICV */
+       append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
+                        LDST_SRCDST_BYTE_CONTEXT);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "rfc4543 enc shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4543_encap);
+
+/**
+ * cnstr_shdsc_rfc4543_decap - IPSec ESP gmac decapsulation shared descriptor
+ *                             (non-protocol).
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM.
+ * @icvsize: integrity check value (ICV) size (truncated or full)
+ */
+void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize)
+{
+       u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
+
+       /* Skip key loading if it is loaded due to sharing */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+       if (cdata->key_inline)
+               append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                                 cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+       else
+               append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 |
+                          KEY_DEST_CLASS_REG);
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* Class 1 operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_DECRYPT | OP_ALG_ICV_ON);
+
+       /* assoclen + cryptlen = seqoutlen */
+       append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+       /*
+        * MOVE_LEN opcode is not available in all SEC HW revisions,
+        * thus need to do some magic, i.e. self-patch the descriptor
+        * buffer.
+        */
+       read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 |
+                                   (0x6 << MOVE_LEN_SHIFT));
+       write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF |
+                                    (0x8 << MOVE_LEN_SHIFT));
+
+       /* Will read assoclen + cryptlen bytes */
+       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+       /* Will write assoclen + cryptlen bytes */
+       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+
+       /* Store payload data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
+
+       /* In-snoop assoclen + cryptlen data */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF |
+                            FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1);
+
+       set_move_tgt_here(desc, read_move_cmd);
+       set_move_tgt_here(desc, write_move_cmd);
+       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+       /* Move payload data to OFIFO */
+       append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO);
+       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+       /* Read ICV */
+       append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 |
+                            FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "rfc4543 dec shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_rfc4543_decap);
+
+/*
+ * For ablkcipher encrypt and decrypt, read from req->src and
+ * write to req->dst
+ */
+static inline void ablkcipher_append_src_dst(u32 *desc)
+{
+       append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 |
+                            KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
+}
+
+/**
+ * cnstr_shdsc_ablkcipher_encap - ablkcipher encapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @ivsize: initialization vector size
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ */
+void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
+                                 unsigned int ivsize, const bool is_rfc3686,
+                                 const u32 ctx1_iv_off)
+{
+       u32 *key_jump_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+
+       /* Load class1 key only */
+       append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                         cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+       /* Load nonce into CONTEXT1 reg */
+       if (is_rfc3686) {
+               u8 *nonce = cdata->key_virt + cdata->keylen;
+
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO |
+                           MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) |
+                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+       }
+
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* Load iv */
+       append_seq_load(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+                       LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
+
+       /* Load counter into CONTEXT1 reg */
+       if (is_rfc3686)
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
+
+       /* Load operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Perform operation */
+       ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "ablkcipher enc shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_encap);
+
+/**
+ * cnstr_shdsc_ablkcipher_decap - ablkcipher decapsulation shared descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128.
+ * @ivsize: initialization vector size
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ */
+void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
+                                 unsigned int ivsize, const bool is_rfc3686,
+                                 const u32 ctx1_iv_off)
+{
+       u32 *key_jump_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+
+       /* Load class1 key only */
+       append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                         cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+       /* Load nonce into CONTEXT1 reg */
+       if (is_rfc3686) {
+               u8 *nonce = cdata->key_virt + cdata->keylen;
+
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO |
+                           MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) |
+                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+       }
+
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* load IV */
+       append_seq_load(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+                       LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
+
+       /* Load counter into CONTEXT1 reg */
+       if (is_rfc3686)
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
+
+       /* Choose operation */
+       if (ctx1_iv_off)
+               append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                                OP_ALG_DECRYPT);
+       else
+               append_dec_op1(desc, cdata->algtype);
+
+       /* Perform operation */
+       ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "ablkcipher dec shdesc@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_decap);
+
+/**
+ * cnstr_shdsc_ablkcipher_givencap - ablkcipher encapsulation shared descriptor
+ *                                   with HW-generated initialization vector.
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed
+ *         with OP_ALG_AAI_CBC.
+ * @ivsize: initialization vector size
+ * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template
+ * @ctx1_iv_off: IV offset in CONTEXT1 register
+ */
+void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
+                                    unsigned int ivsize, const bool is_rfc3686,
+                                    const u32 ctx1_iv_off)
+{
+       u32 *key_jump_cmd, geniv;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+
+       /* Load class1 key only */
+       append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                         cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+       /* Load Nonce into CONTEXT1 reg */
+       if (is_rfc3686) {
+               u8 *nonce = cdata->key_virt + cdata->keylen;
+
+               append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE,
+                                  LDST_CLASS_IND_CCB |
+                                  LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM);
+               append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO |
+                           MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) |
+                           (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT));
+       }
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /* Generate IV */
+       geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
+               NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | NFIFOENTRY_PTYPE_RND |
+               (ivsize << NFIFOENTRY_DLEN_SHIFT);
+       append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
+                           LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
+       append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
+       append_move(desc, MOVE_WAITCOMP | MOVE_SRC_INFIFO |
+                   MOVE_DEST_CLASS1CTX | (ivsize << MOVE_LEN_SHIFT) |
+                   (ctx1_iv_off << MOVE_OFFSET_SHIFT));
+       append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
+
+       /* Copy generated IV to memory */
+       append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT |
+                        LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT));
+
+       /* Load Counter into CONTEXT1 reg */
+       if (is_rfc3686)
+               append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB |
+                                    LDST_SRCDST_BYTE_CONTEXT |
+                                    ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) <<
+                                     LDST_OFFSET_SHIFT));
+
+       if (ctx1_iv_off)
+               append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP |
+                           (1 << JUMP_OFFSET_SHIFT));
+
+       /* Load operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Perform operation */
+       ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_givencap);
+
+/**
+ * cnstr_shdsc_xts_ablkcipher_encap - xts ablkcipher encapsulation shared
+ *                                    descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS.
+ */
+void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata)
+{
+       __be64 sector_size = cpu_to_be64(512);
+       u32 *key_jump_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+
+       /* Load class1 keys only */
+       append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                         cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+       /* Load sector size with index 40 bytes (0x28) */
+       append_load_as_imm(desc, (void *)&sector_size, 8, LDST_CLASS_1_CCB |
+                          LDST_SRCDST_BYTE_CONTEXT |
+                          (0x28 << LDST_OFFSET_SHIFT));
+
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /*
+        * create sequence for loading the sector index
+        * Upper 8B of IV - will be used as sector index
+        * Lower 8B of IV - will be discarded
+        */
+       append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+                       (0x20 << LDST_OFFSET_SHIFT));
+       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+       /* Load operation */
+       append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL |
+                        OP_ALG_ENCRYPT);
+
+       /* Perform operation */
+       ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_encap);
+
+/**
+ * cnstr_shdsc_xts_ablkcipher_decap - xts ablkcipher decapsulation shared
+ *                                    descriptor
+ * @desc: pointer to buffer used for descriptor construction
+ * @cdata: pointer to block cipher transform definitions
+ *         Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS.
+ */
+void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata)
+{
+       __be64 sector_size = cpu_to_be64(512);
+       u32 *key_jump_cmd;
+
+       init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX);
+       /* Skip if already shared */
+       key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                  JUMP_COND_SHRD);
+
+       /* Load class1 key only */
+       append_key_as_imm(desc, cdata->key_virt, cdata->keylen,
+                         cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG);
+
+       /* Load sector size with index 40 bytes (0x28) */
+       append_load_as_imm(desc, (void *)&sector_size, 8, LDST_CLASS_1_CCB |
+                          LDST_SRCDST_BYTE_CONTEXT |
+                          (0x28 << LDST_OFFSET_SHIFT));
+
+       set_jump_tgt_here(desc, key_jump_cmd);
+
+       /*
+        * create sequence for loading the sector index
+        * Upper 8B of IV - will be used as sector index
+        * Lower 8B of IV - will be discarded
+        */
+       append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+                       (0x20 << LDST_OFFSET_SHIFT));
+       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
+
+       /* Load operation */
+       append_dec_op1(desc, cdata->algtype);
+
+       /* Perform operation */
+       ablkcipher_append_src_dst(desc);
+
+#ifdef DEBUG
+       print_hex_dump(KERN_ERR,
+                      "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1);
+#endif
+}
+EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_decap);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FSL CAAM descriptor support");
+MODULE_AUTHOR("Freescale Semiconductor - NMG/STC");
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
new file mode 100644 (file)
index 0000000..9555173
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Shared descriptors for aead, ablkcipher algorithms
+ *
+ * Copyright 2016 NXP
+ */
+
+#ifndef _CAAMALG_DESC_H_
+#define _CAAMALG_DESC_H_
+
+/* length of descriptors text */
+#define DESC_AEAD_BASE                 (4 * CAAM_CMD_SZ)
+#define DESC_AEAD_ENC_LEN              (DESC_AEAD_BASE + 11 * CAAM_CMD_SZ)
+#define DESC_AEAD_DEC_LEN              (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
+#define DESC_AEAD_GIVENC_LEN           (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ)
+
+/* Note: Nonce is counted in cdata.keylen */
+#define DESC_AEAD_CTR_RFC3686_LEN      (4 * CAAM_CMD_SZ)
+
+#define DESC_AEAD_NULL_BASE            (3 * CAAM_CMD_SZ)
+#define DESC_AEAD_NULL_ENC_LEN         (DESC_AEAD_NULL_BASE + 11 * CAAM_CMD_SZ)
+#define DESC_AEAD_NULL_DEC_LEN         (DESC_AEAD_NULL_BASE + 13 * CAAM_CMD_SZ)
+
+#define DESC_GCM_BASE                  (3 * CAAM_CMD_SZ)
+#define DESC_GCM_ENC_LEN               (DESC_GCM_BASE + 16 * CAAM_CMD_SZ)
+#define DESC_GCM_DEC_LEN               (DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
+
+#define DESC_RFC4106_BASE              (3 * CAAM_CMD_SZ)
+#define DESC_RFC4106_ENC_LEN           (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+#define DESC_RFC4106_DEC_LEN           (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+
+#define DESC_RFC4543_BASE              (3 * CAAM_CMD_SZ)
+#define DESC_RFC4543_ENC_LEN           (DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
+#define DESC_RFC4543_DEC_LEN           (DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ)
+
+#define DESC_ABLKCIPHER_BASE           (3 * CAAM_CMD_SZ)
+#define DESC_ABLKCIPHER_ENC_LEN                (DESC_ABLKCIPHER_BASE + \
+                                        20 * CAAM_CMD_SZ)
+#define DESC_ABLKCIPHER_DEC_LEN                (DESC_ABLKCIPHER_BASE + \
+                                        15 * CAAM_CMD_SZ)
+
+void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata,
+                                unsigned int icvsize);
+
+void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata,
+                                unsigned int icvsize);
+
+void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata,
+                           struct alginfo *adata, unsigned int icvsize,
+                           const bool is_rfc3686, u32 *nonce,
+                           const u32 ctx1_iv_off);
+
+void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata,
+                           struct alginfo *adata, unsigned int ivsize,
+                           unsigned int icvsize, const bool geniv,
+                           const bool is_rfc3686, u32 *nonce,
+                           const u32 ctx1_iv_off);
+
+void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata,
+                              struct alginfo *adata, unsigned int ivsize,
+                              unsigned int icvsize, const bool is_rfc3686,
+                              u32 *nonce, const u32 ctx1_iv_off);
+
+void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata,
+                          unsigned int icvsize);
+
+void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata,
+                          unsigned int icvsize);
+
+void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize);
+
+void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize);
+
+void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize);
+
+void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata,
+                              unsigned int icvsize);
+
+void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata,
+                                 unsigned int ivsize, const bool is_rfc3686,
+                                 const u32 ctx1_iv_off);
+
+void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata,
+                                 unsigned int ivsize, const bool is_rfc3686,
+                                 const u32 ctx1_iv_off);
+
+void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata,
+                                    unsigned int ivsize, const bool is_rfc3686,
+                                    const u32 ctx1_iv_off);
+
+void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata);
+
+void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata);
+
+#endif /* _CAAMALG_DESC_H_ */
index 660dc20..e58639e 100644 (file)
@@ -72,7 +72,7 @@
 #define CAAM_MAX_HASH_DIGEST_SIZE      SHA512_DIGEST_SIZE
 
 /* length of descriptors text */
-#define DESC_AHASH_BASE                        (4 * CAAM_CMD_SZ)
+#define DESC_AHASH_BASE                        (3 * CAAM_CMD_SZ)
 #define DESC_AHASH_UPDATE_LEN          (6 * CAAM_CMD_SZ)
 #define DESC_AHASH_UPDATE_FIRST_LEN    (DESC_AHASH_BASE + 4 * CAAM_CMD_SZ)
 #define DESC_AHASH_FINAL_LEN           (DESC_AHASH_BASE + 5 * CAAM_CMD_SZ)
@@ -103,20 +103,15 @@ struct caam_hash_ctx {
        u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
        u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
        u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
-       u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned;
        dma_addr_t sh_desc_update_dma ____cacheline_aligned;
        dma_addr_t sh_desc_update_first_dma;
        dma_addr_t sh_desc_fin_dma;
        dma_addr_t sh_desc_digest_dma;
-       dma_addr_t sh_desc_finup_dma;
        struct device *jrdev;
-       u32 alg_type;
-       u32 alg_op;
        u8 key[CAAM_MAX_HASH_KEY_SIZE];
        dma_addr_t key_dma;
        int ctx_len;
-       unsigned int split_key_len;
-       unsigned int split_key_pad_len;
+       struct alginfo adata;
 };
 
 /* ahash state */
@@ -222,89 +217,54 @@ static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev,
        return 0;
 }
 
-/* Common shared descriptor commands */
-static inline void append_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
-{
-       append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len,
-                         ctx->split_key_len, CLASS_2 |
-                         KEY_DEST_MDHA_SPLIT | KEY_ENC);
-}
-
-/* Append key if it has been set */
-static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
-{
-       u32 *key_jump_cmd;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       if (ctx->split_key_len) {
-               /* Skip if already shared */
-               key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-                                          JUMP_COND_SHRD);
-
-               append_key_ahash(desc, ctx);
-
-               set_jump_tgt_here(desc, key_jump_cmd);
-       }
-
-       /* Propagate errors from shared to job descriptor */
-       append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-}
-
 /*
- * For ahash read data from seqin following state->caam_ctx,
- * and write resulting class2 context to seqout, which may be state->caam_ctx
- * or req->result
+ * For ahash update, final and finup (import_ctx = true)
+ *     import context, read and write to seqout
+ * For ahash firsts and digest (import_ctx = false)
+ *     read and write to seqout
  */
-static inline void ahash_append_load_str(u32 *desc, int digestsize)
+static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize,
+                                    struct caam_hash_ctx *ctx, bool import_ctx)
 {
-       /* Calculate remaining bytes to read */
-       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-       /* Read remaining bytes */
-       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
-                            FIFOLD_TYPE_MSG | KEY_VLF);
+       u32 op = ctx->adata.algtype;
+       u32 *skip_key_load;
 
-       /* Store class2 context bytes */
-       append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
-                        LDST_SRCDST_BYTE_CONTEXT);
-}
+       init_sh_desc(desc, HDR_SHARE_SERIAL);
 
-/*
- * For ahash update, final and finup, import context, read and write to seqout
- */
-static inline void ahash_ctx_data_to_out(u32 *desc, u32 op, u32 state,
-                                        int digestsize,
-                                        struct caam_hash_ctx *ctx)
-{
-       init_sh_desc_key_ahash(desc, ctx);
+       /* Append key if it has been set; ahash update excluded */
+       if ((state != OP_ALG_AS_UPDATE) && (ctx->adata.keylen)) {
+               /* Skip key loading if already shared */
+               skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+                                           JUMP_COND_SHRD);
 
-       /* Import context from software */
-       append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-                  LDST_CLASS_2_CCB | ctx->ctx_len);
+               append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
+                                 ctx->adata.keylen, CLASS_2 |
+                                 KEY_DEST_MDHA_SPLIT | KEY_ENC);
 
-       /* Class 2 operation */
-       append_operation(desc, op | state | OP_ALG_ENCRYPT);
+               set_jump_tgt_here(desc, skip_key_load);
 
-       /*
-        * Load from buf and/or src and write to req->result or state->context
-        */
-       ahash_append_load_str(desc, digestsize);
-}
+               op |= OP_ALG_AAI_HMAC_PRECOMP;
+       }
 
-/* For ahash firsts and digest, read and write to seqout */
-static inline void ahash_data_to_out(u32 *desc, u32 op, u32 state,
-                                    int digestsize, struct caam_hash_ctx *ctx)
-{
-       init_sh_desc_key_ahash(desc, ctx);
+       /* If needed, import context from software */
+       if (import_ctx)
+               append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
+                               LDST_SRCDST_BYTE_CONTEXT);
 
        /* Class 2 operation */
        append_operation(desc, op | state | OP_ALG_ENCRYPT);
 
        /*
         * Load from buf and/or src and write to req->result or state->context
+        * Calculate remaining bytes to read
         */
-       ahash_append_load_str(desc, digestsize);
+       append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       /* Read remaining bytes */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
+                            FIFOLD_TYPE_MSG | KEY_VLF);
+       /* Store class2 context bytes */
+       append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+                        LDST_SRCDST_BYTE_CONTEXT);
 }
 
 static int ahash_set_sh_desc(struct crypto_ahash *ahash)
@@ -312,28 +272,11 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
        struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
        int digestsize = crypto_ahash_digestsize(ahash);
        struct device *jrdev = ctx->jrdev;
-       u32 have_key = 0;
        u32 *desc;
 
-       if (ctx->split_key_len)
-               have_key = OP_ALG_AAI_HMAC_PRECOMP;
-
        /* ahash_update shared descriptor */
        desc = ctx->sh_desc_update;
-
-       init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-       /* Import context from software */
-       append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-                  LDST_CLASS_2_CCB | ctx->ctx_len);
-
-       /* Class 2 operation */
-       append_operation(desc, ctx->alg_type | OP_ALG_AS_UPDATE |
-                        OP_ALG_ENCRYPT);
-
-       /* Load data and write to result or context */
-       ahash_append_load_str(desc, ctx->ctx_len);
-
+       ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true);
        ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
                                                 DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) {
@@ -348,10 +291,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
        /* ahash_update_first shared descriptor */
        desc = ctx->sh_desc_update_first;
-
-       ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INIT,
-                         ctx->ctx_len, ctx);
-
+       ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false);
        ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc,
                                                       desc_bytes(desc),
                                                       DMA_TO_DEVICE);
@@ -367,10 +307,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
        /* ahash_final shared descriptor */
        desc = ctx->sh_desc_fin;
-
-       ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
-                             OP_ALG_AS_FINALIZE, digestsize, ctx);
-
+       ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true);
        ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
                                              DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) {
@@ -383,30 +320,9 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
                       desc_bytes(desc), 1);
 #endif
 
-       /* ahash_finup shared descriptor */
-       desc = ctx->sh_desc_finup;
-
-       ahash_ctx_data_to_out(desc, have_key | ctx->alg_type,
-                             OP_ALG_AS_FINALIZE, digestsize, ctx);
-
-       ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
-                                               DMA_TO_DEVICE);
-       if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) {
-               dev_err(jrdev, "unable to map shared descriptor\n");
-               return -ENOMEM;
-       }
-#ifdef DEBUG
-       print_hex_dump(KERN_ERR, "ahash finup shdesc@"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, desc,
-                      desc_bytes(desc), 1);
-#endif
-
        /* ahash_digest shared descriptor */
        desc = ctx->sh_desc_digest;
-
-       ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INITFINAL,
-                         digestsize, ctx);
-
+       ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false);
        ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc,
                                                 desc_bytes(desc),
                                                 DMA_TO_DEVICE);
@@ -424,14 +340,6 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
        return 0;
 }
 
-static int gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in,
-                             u32 keylen)
-{
-       return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len,
-                              ctx->split_key_pad_len, key_in, keylen,
-                              ctx->alg_op);
-}
-
 /* Digest hash size if it is too large */
 static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
                           u32 *keylen, u8 *key_out, u32 digestsize)
@@ -467,7 +375,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
        }
 
        /* Job descriptor to perform unkeyed hash on key_in */
-       append_operation(desc, ctx->alg_type | OP_ALG_ENCRYPT |
+       append_operation(desc, ctx->adata.algtype | OP_ALG_ENCRYPT |
                         OP_ALG_AS_INITFINAL);
        append_seq_in_ptr(desc, src_dma, *keylen, 0);
        append_seq_fifo_load(desc, *keylen, FIFOLD_CLASS_CLASS2 |
@@ -511,8 +419,6 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in,
 static int ahash_setkey(struct crypto_ahash *ahash,
                        const u8 *key, unsigned int keylen)
 {
-       /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
-       static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
        struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
        struct device *jrdev = ctx->jrdev;
        int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
@@ -537,23 +443,12 @@ static int ahash_setkey(struct crypto_ahash *ahash,
                key = hashed_key;
        }
 
-       /* Pick class 2 key length from algorithm submask */
-       ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
-                                     OP_ALG_ALGSEL_SHIFT] * 2;
-       ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16);
-
-#ifdef DEBUG
-       printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n",
-              ctx->split_key_len, ctx->split_key_pad_len);
-       print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ",
-                      DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-#endif
-
-       ret = gen_split_hash_key(ctx, key, keylen);
+       ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key, keylen,
+                           CAAM_MAX_HASH_KEY_SIZE);
        if (ret)
                goto bad_free_key;
 
-       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len,
+       ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->adata.keylen_pad,
                                      DMA_TO_DEVICE);
        if (dma_mapping_error(jrdev, ctx->key_dma)) {
                dev_err(jrdev, "unable to map key i/o memory\n");
@@ -563,14 +458,15 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 #ifdef DEBUG
        print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
                       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-                      ctx->split_key_pad_len, 1);
+                      ctx->adata.keylen_pad, 1);
 #endif
 
        ret = ahash_set_sh_desc(ahash);
        if (ret) {
-               dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len,
+               dma_unmap_single(jrdev, ctx->key_dma, ctx->adata.keylen_pad,
                                 DMA_TO_DEVICE);
        }
+
  error_free_key:
        kfree(hashed_key);
        return ret;
@@ -639,8 +535,7 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err,
        dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-       edesc = (struct ahash_edesc *)((char *)desc -
-                offsetof(struct ahash_edesc, hw_desc));
+       edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
        if (err)
                caam_jr_strstatus(jrdev, err);
 
@@ -674,8 +569,7 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
        dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-       edesc = (struct ahash_edesc *)((char *)desc -
-                offsetof(struct ahash_edesc, hw_desc));
+       edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
        if (err)
                caam_jr_strstatus(jrdev, err);
 
@@ -709,8 +603,7 @@ static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
        dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-       edesc = (struct ahash_edesc *)((char *)desc -
-                offsetof(struct ahash_edesc, hw_desc));
+       edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
        if (err)
                caam_jr_strstatus(jrdev, err);
 
@@ -744,8 +637,7 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
        dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 #endif
 
-       edesc = (struct ahash_edesc *)((char *)desc -
-                offsetof(struct ahash_edesc, hw_desc));
+       edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
        if (err)
                caam_jr_strstatus(jrdev, err);
 
@@ -1078,7 +970,7 @@ static int ahash_finup_ctx(struct ahash_request *req)
 
        /* allocate space for base edesc and hw desc commands, link tables */
        edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents,
-                                 ctx->sh_desc_finup, ctx->sh_desc_finup_dma,
+                                 ctx->sh_desc_fin, ctx->sh_desc_fin_dma,
                                  flags);
        if (!edesc) {
                dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE);
@@ -1683,7 +1575,6 @@ struct caam_hash_template {
        unsigned int blocksize;
        struct ahash_alg template_ahash;
        u32 alg_type;
-       u32 alg_op;
 };
 
 /* ahash descriptors */
@@ -1709,7 +1600,6 @@ static struct caam_hash_template driver_hash[] = {
                        },
                },
                .alg_type = OP_ALG_ALGSEL_SHA1,
-               .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC,
        }, {
                .name = "sha224",
                .driver_name = "sha224-caam",
@@ -1731,7 +1621,6 @@ static struct caam_hash_template driver_hash[] = {
                        },
                },
                .alg_type = OP_ALG_ALGSEL_SHA224,
-               .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC,
        }, {
                .name = "sha256",
                .driver_name = "sha256-caam",
@@ -1753,7 +1642,6 @@ static struct caam_hash_template driver_hash[] = {
                        },
                },
                .alg_type = OP_ALG_ALGSEL_SHA256,
-               .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC,
        }, {
                .name = "sha384",
                .driver_name = "sha384-caam",
@@ -1775,7 +1663,6 @@ static struct caam_hash_template driver_hash[] = {
                        },
                },
                .alg_type = OP_ALG_ALGSEL_SHA384,
-               .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC,
        }, {
                .name = "sha512",
                .driver_name = "sha512-caam",
@@ -1797,7 +1684,6 @@ static struct caam_hash_template driver_hash[] = {
                        },
                },
                .alg_type = OP_ALG_ALGSEL_SHA512,
-               .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC,
        }, {
                .name = "md5",
                .driver_name = "md5-caam",
@@ -1819,14 +1705,12 @@ static struct caam_hash_template driver_hash[] = {
                        },
                },
                .alg_type = OP_ALG_ALGSEL_MD5,
-               .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC,
        },
 };
 
 struct caam_hash_alg {
        struct list_head entry;
        int alg_type;
-       int alg_op;
        struct ahash_alg ahash_alg;
 };
 
@@ -1859,10 +1743,10 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
                return PTR_ERR(ctx->jrdev);
        }
        /* copy descriptor header template value */
-       ctx->alg_type = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
-       ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_hash->alg_op;
+       ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
 
-       ctx->ctx_len = runninglen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >>
+       ctx->ctx_len = runninglen[(ctx->adata.algtype &
+                                  OP_ALG_ALGSEL_SUBMASK) >>
                                  OP_ALG_ALGSEL_SHIFT];
 
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
@@ -1893,10 +1777,6 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm)
                dma_unmap_single(ctx->jrdev, ctx->sh_desc_digest_dma,
                                 desc_bytes(ctx->sh_desc_digest),
                                 DMA_TO_DEVICE);
-       if (ctx->sh_desc_finup_dma &&
-           !dma_mapping_error(ctx->jrdev, ctx->sh_desc_finup_dma))
-               dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma,
-                                desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE);
 
        caam_jr_free(ctx->jrdev);
 }
@@ -1956,7 +1836,6 @@ caam_hash_alloc(struct caam_hash_template *template,
        alg->cra_type = &crypto_ahash_type;
 
        t_alg->alg_type = template->alg_type;
-       t_alg->alg_op = template->alg_op;
 
        return t_alg;
 }
index 851015e..32100c4 100644 (file)
@@ -395,7 +395,7 @@ static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
                                unsigned int keylen)
 {
        struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-       struct rsa_key raw_key = {0};
+       struct rsa_key raw_key = {NULL};
        struct caam_rsa_key *rsa_key = &ctx->key;
        int ret;
 
@@ -441,7 +441,7 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
                                 unsigned int keylen)
 {
        struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
-       struct rsa_key raw_key = {0};
+       struct rsa_key raw_key = {NULL};
        struct caam_rsa_key *rsa_key = &ctx->key;
        int ret;
 
index 9b92af2..41398da 100644 (file)
@@ -52,7 +52,7 @@
 
 /* length of descriptors */
 #define DESC_JOB_O_LEN                 (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
-#define DESC_RNG_LEN                   (4 * CAAM_CMD_SZ)
+#define DESC_RNG_LEN                   (3 * CAAM_CMD_SZ)
 
 /* Buffer, its dma address and lock */
 struct buf_data {
@@ -100,8 +100,7 @@ static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
 {
        struct buf_data *bd;
 
-       bd = (struct buf_data *)((char *)desc -
-             offsetof(struct buf_data, hw_desc));
+       bd = container_of(desc, struct buf_data, hw_desc[0]);
 
        if (err)
                caam_jr_strstatus(jrdev, err);
@@ -196,9 +195,6 @@ static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
 
        init_sh_desc(desc, HDR_SHARE_SERIAL);
 
-       /* Propagate errors from shared to job descriptor */
-       append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD);
-
        /* Generate random bytes */
        append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
 
@@ -351,7 +347,7 @@ static int __init caam_rng_init(void)
                pr_err("Job Ring Device allocation for transform failed\n");
                return PTR_ERR(dev);
        }
-       rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA);
+       rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
        if (!rng_ctx) {
                err = -ENOMEM;
                goto free_caam_alloc;
index e483b78..7551098 100644 (file)
@@ -330,8 +330,8 @@ static int caam_remove(struct platform_device *pdev)
        clk_disable_unprepare(ctrlpriv->caam_ipg);
        clk_disable_unprepare(ctrlpriv->caam_mem);
        clk_disable_unprepare(ctrlpriv->caam_aclk);
-       clk_disable_unprepare(ctrlpriv->caam_emi_slow);
-
+       if (ctrlpriv->caam_emi_slow)
+               clk_disable_unprepare(ctrlpriv->caam_emi_slow);
        return 0;
 }
 
@@ -365,11 +365,8 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
         */
        val = (rd_reg32(&r4tst->rtsdctl) & RTSDCTL_ENT_DLY_MASK)
              >> RTSDCTL_ENT_DLY_SHIFT;
-       if (ent_delay <= val) {
-               /* put RNG4 into run mode */
-               clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0);
-               return;
-       }
+       if (ent_delay <= val)
+               goto start_rng;
 
        val = rd_reg32(&r4tst->rtsdctl);
        val = (val & ~RTSDCTL_ENT_DLY_MASK) |
@@ -381,15 +378,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
        wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE);
        /* read the control register */
        val = rd_reg32(&r4tst->rtmctl);
+start_rng:
        /*
         * select raw sampling in both entropy shifter
-        * and statistical checker
+        * and statistical checker; ; put RNG4 into run mode
         */
-       clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC);
-       /* put RNG4 into run mode */
-       clrsetbits_32(&val, RTMCTL_PRGM, 0);
-       /* write back the control register */
-       wr_reg32(&r4tst->rtmctl, val);
+       clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
 }
 
 /**
@@ -482,14 +476,16 @@ static int caam_probe(struct platform_device *pdev)
        }
        ctrlpriv->caam_aclk = clk;
 
-       clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
-       if (IS_ERR(clk)) {
-               ret = PTR_ERR(clk);
-               dev_err(&pdev->dev,
-                       "can't identify CAAM emi_slow clk: %d\n", ret);
-               return ret;
+       if (!of_machine_is_compatible("fsl,imx6ul")) {
+               clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
+               if (IS_ERR(clk)) {
+                       ret = PTR_ERR(clk);
+                       dev_err(&pdev->dev,
+                               "can't identify CAAM emi_slow clk: %d\n", ret);
+                       return ret;
+               }
+               ctrlpriv->caam_emi_slow = clk;
        }
-       ctrlpriv->caam_emi_slow = clk;
 
        ret = clk_prepare_enable(ctrlpriv->caam_ipg);
        if (ret < 0) {
@@ -510,11 +506,13 @@ static int caam_probe(struct platform_device *pdev)
                goto disable_caam_mem;
        }
 
-       ret = clk_prepare_enable(ctrlpriv->caam_emi_slow);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "can't enable CAAM emi slow clock: %d\n",
-                       ret);
-               goto disable_caam_aclk;
+       if (ctrlpriv->caam_emi_slow) {
+               ret = clk_prepare_enable(ctrlpriv->caam_emi_slow);
+               if (ret < 0) {
+                       dev_err(&pdev->dev, "can't enable CAAM emi slow clock: %d\n",
+                               ret);
+                       goto disable_caam_aclk;
+               }
        }
 
        /* Get configuration properties from device tree */
@@ -541,13 +539,13 @@ static int caam_probe(struct platform_device *pdev)
        else
                BLOCK_OFFSET = PG_SIZE_64K;
 
-       ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl;
-       ctrlpriv->assure = (struct caam_assurance __force *)
-                          ((uint8_t *)ctrl +
+       ctrlpriv->ctrl = (struct caam_ctrl __iomem __force *)ctrl;
+       ctrlpriv->assure = (struct caam_assurance __iomem __force *)
+                          ((__force uint8_t *)ctrl +
                            BLOCK_OFFSET * ASSURE_BLOCK_NUMBER
                           );
-       ctrlpriv->deco = (struct caam_deco __force *)
-                        ((uint8_t *)ctrl +
+       ctrlpriv->deco = (struct caam_deco __iomem __force *)
+                        ((__force uint8_t *)ctrl +
                         BLOCK_OFFSET * DECO_BLOCK_NUMBER
                         );
 
@@ -627,8 +625,8 @@ static int caam_probe(struct platform_device *pdev)
                                        ring);
                                continue;
                        }
-                       ctrlpriv->jr[ring] = (struct caam_job_ring __force *)
-                                            ((uint8_t *)ctrl +
+                       ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *)
+                                            ((__force uint8_t *)ctrl +
                                             (ring + JR_BLOCK_NUMBER) *
                                              BLOCK_OFFSET
                                             );
@@ -641,8 +639,8 @@ static int caam_probe(struct platform_device *pdev)
                        !!(rd_reg32(&ctrl->perfmon.comp_parms_ms) &
                           CTPR_MS_QI_MASK);
        if (ctrlpriv->qi_present) {
-               ctrlpriv->qi = (struct caam_queue_if __force *)
-                              ((uint8_t *)ctrl +
+               ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
+                              ((__force uint8_t *)ctrl +
                                 BLOCK_OFFSET * QI_BLOCK_NUMBER
                               );
                /* This is all that's required to physically enable QI */
@@ -800,7 +798,7 @@ static int caam_probe(struct platform_device *pdev)
                                    &caam_fops_u32_ro);
 
        /* Internal covering keys (useful in non-secure mode only) */
-       ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0];
+       ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0];
        ctrlpriv->ctl_kek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
        ctrlpriv->ctl_kek = debugfs_create_blob("kek",
                                                S_IRUSR |
@@ -808,7 +806,7 @@ static int caam_probe(struct platform_device *pdev)
                                                ctrlpriv->ctl,
                                                &ctrlpriv->ctl_kek_wrap);
 
-       ctrlpriv->ctl_tkek_wrap.data = &ctrlpriv->ctrl->tkek[0];
+       ctrlpriv->ctl_tkek_wrap.data = (__force void *)&ctrlpriv->ctrl->tkek[0];
        ctrlpriv->ctl_tkek_wrap.size = KEK_KEY_SIZE * sizeof(u32);
        ctrlpriv->ctl_tkek = debugfs_create_blob("tkek",
                                                 S_IRUSR |
@@ -816,7 +814,7 @@ static int caam_probe(struct platform_device *pdev)
                                                 ctrlpriv->ctl,
                                                 &ctrlpriv->ctl_tkek_wrap);
 
-       ctrlpriv->ctl_tdsk_wrap.data = &ctrlpriv->ctrl->tdsk[0];
+       ctrlpriv->ctl_tdsk_wrap.data = (__force void *)&ctrlpriv->ctrl->tdsk[0];
        ctrlpriv->ctl_tdsk_wrap.size = KEK_KEY_SIZE * sizeof(u32);
        ctrlpriv->ctl_tdsk = debugfs_create_blob("tdsk",
                                                 S_IRUSR |
@@ -833,7 +831,8 @@ caam_remove:
 iounmap_ctrl:
        iounmap(ctrl);
 disable_caam_emi_slow:
-       clk_disable_unprepare(ctrlpriv->caam_emi_slow);
+       if (ctrlpriv->caam_emi_slow)
+               clk_disable_unprepare(ctrlpriv->caam_emi_slow);
 disable_caam_aclk:
        clk_disable_unprepare(ctrlpriv->caam_aclk);
 disable_caam_mem:
index 513b664..2e6766a 100644 (file)
 #define SEC4_SG_LEN_MASK       0x3fffffff      /* Excludes EXT and FINAL */
 #define SEC4_SG_OFFSET_MASK    0x00001fff
 
-struct sec4_sg_entry {
-       u64 ptr;
-       u32 len;
-       u32 bpid_offset;
-};
-
 /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */
 #define MAX_CAAM_DESCSIZE      64
 
@@ -90,8 +84,8 @@ struct sec4_sg_entry {
 #define HDR_ZRO                        0x00008000
 
 /* Start Index or SharedDesc Length */
-#define HDR_START_IDX_MASK     0x3f
 #define HDR_START_IDX_SHIFT    16
+#define HDR_START_IDX_MASK     (0x3f << HDR_START_IDX_SHIFT)
 
 /* If shared descriptor header, 6-bit length */
 #define HDR_DESCLEN_SHR_MASK   0x3f
@@ -121,10 +115,10 @@ struct sec4_sg_entry {
 #define HDR_PROP_DNR           0x00000800
 
 /* JobDesc/SharedDesc share property */
-#define HDR_SD_SHARE_MASK      0x03
 #define HDR_SD_SHARE_SHIFT     8
-#define HDR_JD_SHARE_MASK      0x07
+#define HDR_SD_SHARE_MASK      (0x03 << HDR_SD_SHARE_SHIFT)
 #define HDR_JD_SHARE_SHIFT     8
+#define HDR_JD_SHARE_MASK      (0x07 << HDR_JD_SHARE_SHIFT)
 
 #define HDR_SHARE_NEVER                (0x00 << HDR_SD_SHARE_SHIFT)
 #define HDR_SHARE_WAIT         (0x01 << HDR_SD_SHARE_SHIFT)
@@ -235,7 +229,7 @@ struct sec4_sg_entry {
 #define LDST_SRCDST_WORD_DECO_MATH2    (0x0a << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_DECO_AAD_SZ   (0x0b << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_DECO_MATH3    (0x0b << LDST_SRCDST_SHIFT)
-#define LDST_SRCDST_WORD_CLASS1_ICV_SZ (0x0c << LDST_SRCDST_SHIFT)
+#define LDST_SRCDST_WORD_CLASS1_IV_SZ  (0x0c << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_ALTDS_CLASS1  (0x0f << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_PKHA_A_SZ     (0x10 << LDST_SRCDST_SHIFT)
 #define LDST_SRCDST_WORD_PKHA_B_SZ     (0x11 << LDST_SRCDST_SHIFT)
@@ -400,7 +394,7 @@ struct sec4_sg_entry {
 #define FIFOST_TYPE_PKHA_N      (0x08 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_A      (0x0c << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_B      (0x0d << FIFOST_TYPE_SHIFT)
-#define FIFOST_TYPE_AF_SBOX_JKEK (0x10 << FIFOST_TYPE_SHIFT)
+#define FIFOST_TYPE_AF_SBOX_JKEK (0x20 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_AF_SBOX_TKEK (0x21 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_E_JKEK         (0x22 << FIFOST_TYPE_SHIFT)
 #define FIFOST_TYPE_PKHA_E_TKEK         (0x23 << FIFOST_TYPE_SHIFT)
@@ -1107,8 +1101,8 @@ struct sec4_sg_entry {
 /* For non-protocol/alg-only op commands */
 #define OP_ALG_TYPE_SHIFT      24
 #define OP_ALG_TYPE_MASK       (0x7 << OP_ALG_TYPE_SHIFT)
-#define OP_ALG_TYPE_CLASS1     2
-#define OP_ALG_TYPE_CLASS2     4
+#define OP_ALG_TYPE_CLASS1     (2 << OP_ALG_TYPE_SHIFT)
+#define OP_ALG_TYPE_CLASS2     (4 << OP_ALG_TYPE_SHIFT)
 
 #define OP_ALG_ALGSEL_SHIFT    16
 #define OP_ALG_ALGSEL_MASK     (0xff << OP_ALG_ALGSEL_SHIFT)
@@ -1249,7 +1243,7 @@ struct sec4_sg_entry {
 #define OP_ALG_PKMODE_MOD_PRIMALITY    0x00f
 
 /* PKHA mode copy-memory functions */
-#define OP_ALG_PKMODE_SRC_REG_SHIFT    13
+#define OP_ALG_PKMODE_SRC_REG_SHIFT    17
 #define OP_ALG_PKMODE_SRC_REG_MASK     (7 << OP_ALG_PKMODE_SRC_REG_SHIFT)
 #define OP_ALG_PKMODE_DST_REG_SHIFT    10
 #define OP_ALG_PKMODE_DST_REG_MASK     (7 << OP_ALG_PKMODE_DST_REG_SHIFT)
index a8cd8a7..b9c8d98 100644 (file)
 
 extern bool caam_little_end;
 
-static inline int desc_len(u32 *desc)
+static inline int desc_len(u32 * const desc)
 {
        return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK;
 }
 
-static inline int desc_bytes(void *desc)
+static inline int desc_bytes(void * const desc)
 {
        return desc_len(desc) * CAAM_CMD_SZ;
 }
 
-static inline u32 *desc_end(u32 *desc)
+static inline u32 *desc_end(u32 * const desc)
 {
        return desc + desc_len(desc);
 }
 
-static inline void *sh_desc_pdb(u32 *desc)
+static inline void *sh_desc_pdb(u32 * const desc)
 {
        return desc + 1;
 }
 
-static inline void init_desc(u32 *desc, u32 options)
+static inline void init_desc(u32 * const desc, u32 options)
 {
        *desc = cpu_to_caam32((options | HDR_ONE) + 1);
 }
 
-static inline void init_sh_desc(u32 *desc, u32 options)
+static inline void init_sh_desc(u32 * const desc, u32 options)
 {
        PRINT_POS;
        init_desc(desc, CMD_SHARED_DESC_HDR | options);
 }
 
-static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+static inline void init_sh_desc_pdb(u32 * const desc, u32 options,
+                                   size_t pdb_bytes)
 {
        u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
 
@@ -72,19 +73,20 @@ static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
                     options);
 }
 
-static inline void init_job_desc(u32 *desc, u32 options)
+static inline void init_job_desc(u32 * const desc, u32 options)
 {
        init_desc(desc, CMD_DESC_HDR | options);
 }
 
-static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes)
+static inline void init_job_desc_pdb(u32 * const desc, u32 options,
+                                    size_t pdb_bytes)
 {
        u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ;
 
        init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options);
 }
 
-static inline void append_ptr(u32 *desc, dma_addr_t ptr)
+static inline void append_ptr(u32 * const desc, dma_addr_t ptr)
 {
        dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
 
@@ -94,8 +96,8 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr)
                                CAAM_PTR_SZ / CAAM_CMD_SZ);
 }
 
-static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
-                                       u32 options)
+static inline void init_job_desc_shared(u32 * const desc, dma_addr_t ptr,
+                                       int len, u32 options)
 {
        PRINT_POS;
        init_job_desc(desc, HDR_SHARED | options |
@@ -103,7 +105,7 @@ static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len,
        append_ptr(desc, ptr);
 }
 
-static inline void append_data(u32 *desc, void *data, int len)
+static inline void append_data(u32 * const desc, void *data, int len)
 {
        u32 *offset = desc_end(desc);
 
@@ -114,7 +116,7 @@ static inline void append_data(u32 *desc, void *data, int len)
                                (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ);
 }
 
-static inline void append_cmd(u32 *desc, u32 command)
+static inline void append_cmd(u32 * const desc, u32 command)
 {
        u32 *cmd = desc_end(desc);
 
@@ -125,7 +127,7 @@ static inline void append_cmd(u32 *desc, u32 command)
 
 #define append_u32 append_cmd
 
-static inline void append_u64(u32 *desc, u64 data)
+static inline void append_u64(u32 * const desc, u64 data)
 {
        u32 *offset = desc_end(desc);
 
@@ -142,14 +144,14 @@ static inline void append_u64(u32 *desc, u64 data)
 }
 
 /* Write command without affecting header, and return pointer to next word */
-static inline u32 *write_cmd(u32 *desc, u32 command)
+static inline u32 *write_cmd(u32 * const desc, u32 command)
 {
        *desc = cpu_to_caam32(command);
 
        return desc + 1;
 }
 
-static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
+static inline void append_cmd_ptr(u32 * const desc, dma_addr_t ptr, int len,
                                  u32 command)
 {
        append_cmd(desc, command | len);
@@ -157,7 +159,7 @@ static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
 }
 
 /* Write length after pointer, rather than inside command */
-static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
+static inline void append_cmd_ptr_extlen(u32 * const desc, dma_addr_t ptr,
                                         unsigned int len, u32 command)
 {
        append_cmd(desc, command);
@@ -166,7 +168,7 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
        append_cmd(desc, len);
 }
 
-static inline void append_cmd_data(u32 *desc, void *data, int len,
+static inline void append_cmd_data(u32 * const desc, void *data, int len,
                                   u32 command)
 {
        append_cmd(desc, command | IMMEDIATE | len);
@@ -174,7 +176,7 @@ static inline void append_cmd_data(u32 *desc, void *data, int len,
 }
 
 #define APPEND_CMD_RET(cmd, op) \
-static inline u32 *append_##cmd(u32 *desc, u32 options) \
+static inline u32 *append_##cmd(u32 * const desc, u32 options) \
 { \
        u32 *cmd = desc_end(desc); \
        PRINT_POS; \
@@ -184,13 +186,13 @@ static inline u32 *append_##cmd(u32 *desc, u32 options) \
 APPEND_CMD_RET(jump, JUMP)
 APPEND_CMD_RET(move, MOVE)
 
-static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd)
+static inline void set_jump_tgt_here(u32 * const desc, u32 *jump_cmd)
 {
        *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) |
                                  (desc_len(desc) - (jump_cmd - desc)));
 }
 
-static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
+static inline void set_move_tgt_here(u32 * const desc, u32 *move_cmd)
 {
        u32 val = caam32_to_cpu(*move_cmd);
 
@@ -200,7 +202,7 @@ static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd)
 }
 
 #define APPEND_CMD(cmd, op) \
-static inline void append_##cmd(u32 *desc, u32 options) \
+static inline void append_##cmd(u32 * const desc, u32 options) \
 { \
        PRINT_POS; \
        append_cmd(desc, CMD_##op | options); \
@@ -208,7 +210,8 @@ static inline void append_##cmd(u32 *desc, u32 options) \
 APPEND_CMD(operation, OPERATION)
 
 #define APPEND_CMD_LEN(cmd, op) \
-static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \
+static inline void append_##cmd(u32 * const desc, unsigned int len, \
+                               u32 options) \
 { \
        PRINT_POS; \
        append_cmd(desc, CMD_##op | len | options); \
@@ -220,8 +223,8 @@ APPEND_CMD_LEN(seq_fifo_load, SEQ_FIFO_LOAD)
 APPEND_CMD_LEN(seq_fifo_store, SEQ_FIFO_STORE)
 
 #define APPEND_CMD_PTR(cmd, op) \
-static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
-                               u32 options) \
+static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \
+                               unsigned int len, u32 options) \
 { \
        PRINT_POS; \
        append_cmd_ptr(desc, ptr, len, CMD_##op | options); \
@@ -231,8 +234,8 @@ APPEND_CMD_PTR(load, LOAD)
 APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
 APPEND_CMD_PTR(fifo_store, FIFO_STORE)
 
-static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
-                               u32 options)
+static inline void append_store(u32 * const desc, dma_addr_t ptr,
+                               unsigned int len, u32 options)
 {
        u32 cmd_src;
 
@@ -249,7 +252,8 @@ static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
 }
 
 #define APPEND_SEQ_PTR_INTLEN(cmd, op) \
-static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
+static inline void append_seq_##cmd##_ptr_intlen(u32 * const desc, \
+                                                dma_addr_t ptr, \
                                                 unsigned int len, \
                                                 u32 options) \
 { \
@@ -263,7 +267,7 @@ APPEND_SEQ_PTR_INTLEN(in, IN)
 APPEND_SEQ_PTR_INTLEN(out, OUT)
 
 #define APPEND_CMD_PTR_TO_IMM(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
                                         unsigned int len, u32 options) \
 { \
        PRINT_POS; \
@@ -273,7 +277,7 @@ APPEND_CMD_PTR_TO_IMM(load, LOAD);
 APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD);
 
 #define APPEND_CMD_PTR_EXTLEN(cmd, op) \
-static inline void append_##cmd##_extlen(u32 *desc, dma_addr_t ptr, \
+static inline void append_##cmd##_extlen(u32 * const desc, dma_addr_t ptr, \
                                         unsigned int len, u32 options) \
 { \
        PRINT_POS; \
@@ -287,7 +291,7 @@ APPEND_CMD_PTR_EXTLEN(seq_out_ptr, SEQ_OUT_PTR)
  * the size of its type
  */
 #define APPEND_CMD_PTR_LEN(cmd, op, type) \
-static inline void append_##cmd(u32 *desc, dma_addr_t ptr, \
+static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \
                                type len, u32 options) \
 { \
        PRINT_POS; \
@@ -304,7 +308,7 @@ APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32)
  * from length of immediate data provided, e.g., split keys
  */
 #define APPEND_CMD_PTR_TO_IMM2(cmd, op) \
-static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
+static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \
                                         unsigned int data_len, \
                                         unsigned int len, u32 options) \
 { \
@@ -315,7 +319,7 @@ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \
 APPEND_CMD_PTR_TO_IMM2(key, KEY);
 
 #define APPEND_CMD_RAW_IMM(cmd, op, type) \
-static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \
+static inline void append_##cmd##_imm_##type(u32 * const desc, type immediate, \
                                             u32 options) \
 { \
        PRINT_POS; \
@@ -426,3 +430,64 @@ do { \
        APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data)
 #define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \
        APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data)
+
+/**
+ * struct alginfo - Container for algorithm details
+ * @algtype: algorithm selector; for valid values, see documentation of the
+ *           functions where it is used.
+ * @keylen: length of the provided algorithm key, in bytes
+ * @keylen_pad: padded length of the provided algorithm key, in bytes
+ * @key: address where algorithm key resides; virtual address if key_inline
+ *       is true, dma (bus) address if key_inline is false.
+ * @key_inline: true - key can be inlined in the descriptor; false - key is
+ *              referenced by the descriptor
+ */
+struct alginfo {
+       u32 algtype;
+       unsigned int keylen;
+       unsigned int keylen_pad;
+       union {
+               dma_addr_t key_dma;
+               void *key_virt;
+       };
+       bool key_inline;
+};
+
+/**
+ * desc_inline_query() - Provide indications on which data items can be inlined
+ *                       and which shall be referenced in a shared descriptor.
+ * @sd_base_len: Shared descriptor base length - bytes consumed by the commands,
+ *               excluding the data items to be inlined (or corresponding
+ *               pointer if an item is not inlined). Each cnstr_* function that
+ *               generates descriptors should have a define mentioning
+ *               corresponding length.
+ * @jd_len: Maximum length of the job descriptor(s) that will be used
+ *          together with the shared descriptor.
+ * @data_len: Array of lengths of the data items trying to be inlined
+ * @inl_mask: 32bit mask with bit x = 1 if data item x can be inlined, 0
+ *            otherwise.
+ * @count: Number of data items (size of @data_len array); must be <= 32
+ *
+ * Return: 0 if data can be inlined / referenced, negative value if not. If 0,
+ *         check @inl_mask for details.
+ */
+static inline int desc_inline_query(unsigned int sd_base_len,
+                                   unsigned int jd_len, unsigned int *data_len,
+                                   u32 *inl_mask, unsigned int count)
+{
+       int rem_bytes = (int)(CAAM_DESC_BYTES_MAX - sd_base_len - jd_len);
+       unsigned int i;
+
+       *inl_mask = 0;
+       for (i = 0; (i < count) && (rem_bytes > 0); i++) {
+               if (rem_bytes - (int)(data_len[i] +
+                       (count - i - 1) * CAAM_PTR_SZ) >= 0) {
+                       rem_bytes -= data_len[i];
+                       *inl_mask |= (1 << i);
+               } else {
+                       rem_bytes -= CAAM_PTR_SZ;
+               }
+       }
+
+       return (rem_bytes >= 0) ? 0 : -1;
+}
index 33e41ea..79a0cc7 100644 (file)
@@ -146,10 +146,9 @@ static void report_ccb_status(struct device *jrdev, const u32 status,
            strlen(rng_err_id_list[err_id])) {
                /* RNG-only error */
                err_str = rng_err_id_list[err_id];
-       } else if (err_id < ARRAY_SIZE(err_id_list))
+       } else {
                err_str = err_id_list[err_id];
-       else
-               snprintf(err_err_code, sizeof(err_err_code), "%02x", err_id);
+       }
 
        /*
         * CCB ICV check failures are part of normal operation life;
index 5d4c050..e2bcacc 100644 (file)
@@ -41,6 +41,7 @@ struct caam_drv_private_jr {
        struct device           *dev;
        int ridx;
        struct caam_job_ring __iomem *rregs;    /* JobR's register space */
+       struct tasklet_struct irqtask;
        int irq;                        /* One per queue */
 
        /* Number of scatterlist crypt transforms active on the JobR */
index 757c27f..c8604df 100644 (file)
@@ -73,6 +73,8 @@ static int caam_jr_shutdown(struct device *dev)
 
        ret = caam_reset_hw_jr(dev);
 
+       tasklet_kill(&jrp->irqtask);
+
        /* Release interrupt */
        free_irq(jrp->irq, dev);
 
@@ -128,7 +130,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
 
        /*
         * Check the output ring for ready responses, kick
-        * the threaded irq if jobs done.
+        * tasklet if jobs done.
         */
        irqstate = rd_reg32(&jrp->rregs->jrintstatus);
        if (!irqstate)
@@ -150,13 +152,18 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev)
        /* Have valid interrupt at this point, just ACK and trigger */
        wr_reg32(&jrp->rregs->jrintstatus, irqstate);
 
-       return IRQ_WAKE_THREAD;
+       preempt_disable();
+       tasklet_schedule(&jrp->irqtask);
+       preempt_enable();
+
+       return IRQ_HANDLED;
 }
 
-static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
+/* Deferred service handler, run as interrupt-fired tasklet */
+static void caam_jr_dequeue(unsigned long devarg)
 {
        int hw_idx, sw_idx, i, head, tail;
-       struct device *dev = st_dev;
+       struct device *dev = (struct device *)devarg;
        struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
        void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
        u32 *userdesc, userstatus;
@@ -230,8 +237,6 @@ static irqreturn_t caam_jr_threadirq(int irq, void *st_dev)
 
        /* reenable / unmask IRQs */
        clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0);
-
-       return IRQ_HANDLED;
 }
 
 /**
@@ -389,10 +394,11 @@ static int caam_jr_init(struct device *dev)
 
        jrp = dev_get_drvdata(dev);
 
+       tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
+
        /* Connect job ring interrupt handler. */
-       error = request_threaded_irq(jrp->irq, caam_jr_interrupt,
-                                    caam_jr_threadirq, IRQF_SHARED,
-                                    dev_name(dev), dev);
+       error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
+                           dev_name(dev), dev);
        if (error) {
                dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
                        jrp->ridx, jrp->irq);
@@ -454,6 +460,7 @@ out_free_inpring:
 out_free_irq:
        free_irq(jrp->irq, dev);
 out_kill_deq:
+       tasklet_kill(&jrp->irqtask);
        return error;
 }
 
@@ -489,7 +496,7 @@ static int caam_jr_probe(struct platform_device *pdev)
                return -ENOMEM;
        }
 
-       jrpriv->rregs = (struct caam_job_ring __force *)ctrl;
+       jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
 
        if (sizeof(dma_addr_t) == sizeof(u64))
                if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring"))
index e1eaf4f..1bb2816 100644 (file)
 #include "desc_constr.h"
 #include "key_gen.h"
 
+/**
+ * split_key_len - Compute MDHA split key length for a given algorithm
+ * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
+ *        SHA224, SHA384, SHA512.
+ *
+ * Return: MDHA split key length
+ */
+static inline u32 split_key_len(u32 hash)
+{
+       /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */
+       static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 };
+       u32 idx;
+
+       idx = (hash & OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT;
+
+       return (u32)(mdpadlen[idx] * 2);
+}
+
+/**
+ * split_key_pad_len - Compute MDHA split key pad length for a given algorithm
+ * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1,
+ *        SHA224, SHA384, SHA512.
+ *
+ * Return: MDHA split key pad length
+ */
+static inline u32 split_key_pad_len(u32 hash)
+{
+       return ALIGN(split_key_len(hash), 16);
+}
+
 void split_key_done(struct device *dev, u32 *desc, u32 err,
                           void *context)
 {
@@ -41,15 +71,29 @@ Split key generation-----------------------------------------------
 [06] 0x64260028    fifostr: class2 mdsplit-jdk len=40
                        @0xffe04000
 */
-int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
-                 int split_key_pad_len, const u8 *key_in, u32 keylen,
-                 u32 alg_op)
+int gen_split_key(struct device *jrdev, u8 *key_out,
+                 struct alginfo * const adata, const u8 *key_in, u32 keylen,
+                 int max_keylen)
 {
        u32 *desc;
        struct split_key_result result;
        dma_addr_t dma_addr_in, dma_addr_out;
        int ret = -ENOMEM;
 
+       adata->keylen = split_key_len(adata->algtype & OP_ALG_ALGSEL_MASK);
+       adata->keylen_pad = split_key_pad_len(adata->algtype &
+                                             OP_ALG_ALGSEL_MASK);
+
+#ifdef DEBUG
+       dev_err(jrdev, "split keylen %d split keylen padded %d\n",
+               adata->keylen, adata->keylen_pad);
+       print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
+                      DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
+#endif
+
+       if (adata->keylen_pad > max_keylen)
+               return -EINVAL;
+
        desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
        if (!desc) {
                dev_err(jrdev, "unable to allocate key input memory\n");
@@ -63,7 +107,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
                goto out_free;
        }
 
-       dma_addr_out = dma_map_single(jrdev, key_out, split_key_pad_len,
+       dma_addr_out = dma_map_single(jrdev, key_out, adata->keylen_pad,
                                      DMA_FROM_DEVICE);
        if (dma_mapping_error(jrdev, dma_addr_out)) {
                dev_err(jrdev, "unable to map key output memory\n");
@@ -74,7 +118,9 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
        append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG);
 
        /* Sets MDHA up into an HMAC-INIT */
-       append_operation(desc, alg_op | OP_ALG_DECRYPT | OP_ALG_AS_INIT);
+       append_operation(desc, (adata->algtype & OP_ALG_ALGSEL_MASK) |
+                        OP_ALG_AAI_HMAC | OP_TYPE_CLASS2_ALG | OP_ALG_DECRYPT |
+                        OP_ALG_AS_INIT);
 
        /*
         * do a FIFO_LOAD of zero, this will trigger the internal key expansion
@@ -87,7 +133,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
         * FIFO_STORE with the explicit split-key content store
         * (0x26 output type)
         */
-       append_fifo_store(desc, dma_addr_out, split_key_len,
+       append_fifo_store(desc, dma_addr_out, adata->keylen,
                          LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK);
 
 #ifdef DEBUG
@@ -108,11 +154,11 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
 #ifdef DEBUG
                print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ",
                               DUMP_PREFIX_ADDRESS, 16, 4, key_out,
-                              split_key_pad_len, 1);
+                              adata->keylen_pad, 1);
 #endif
        }
 
-       dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len,
+       dma_unmap_single(jrdev, dma_addr_out, adata->keylen_pad,
                         DMA_FROM_DEVICE);
 out_unmap_in:
        dma_unmap_single(jrdev, dma_addr_in, keylen, DMA_TO_DEVICE);
index c5588f6..4628f38 100644 (file)
@@ -12,6 +12,6 @@ struct split_key_result {
 
 void split_key_done(struct device *dev, u32 *desc, u32 err, void *context);
 
-int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len,
-                   int split_key_pad_len, const u8 *key_in, u32 keylen,
-                   u32 alg_op);
+int gen_split_key(struct device *jrdev, u8 *key_out,
+                 struct alginfo * const adata, const u8 *key_in, u32 keylen,
+                 int max_keylen);
index 41cd5a3..6afa20c 100644 (file)
@@ -7,7 +7,11 @@
 
 #include "regs.h"
 
-struct sec4_sg_entry;
+struct sec4_sg_entry {
+       u64 ptr;
+       u32 len;
+       u32 bpid_offset;
+};
 
 /*
  * convert single dma address to h/w link table format
index 8d2dbac..7bc0998 100644 (file)
@@ -404,10 +404,6 @@ static int ccp_init(struct ccp_device *ccp)
                goto e_pool;
        }
 
-       /* Initialize the queues used to wait for KSB space and suspend */
-       init_waitqueue_head(&ccp->sb_queue);
-       init_waitqueue_head(&ccp->suspend_queue);
-
        dev_dbg(dev, "Starting threads...\n");
        /* Create a kthread for each queue */
        for (i = 0; i < ccp->cmd_q_count; i++) {
index faf3cb3..e2ce819 100644 (file)
 
 #include "ccp-dev.h"
 
+/* Allocate the requested number of contiguous LSB slots
+ * from the LSB bitmap. Look in the private range for this
+ * queue first; failing that, check the public area.
+ * If no space is available, wait around.
+ * Return: first slot number
+ */
 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
 {
        struct ccp_device *ccp;
@@ -50,7 +56,7 @@ static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
                        bitmap_set(ccp->lsbmap, start, count);
 
                        mutex_unlock(&ccp->sb_mutex);
-                       return start * LSB_ITEM_SIZE;
+                       return start;
                }
 
                ccp->sb_avail = 0;
@@ -63,17 +69,18 @@ static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
        }
 }
 
+/* Free a number of LSB slots from the bitmap, starting at
+ * the indicated starting slot number.
+ */
 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
                         unsigned int count)
 {
-       int lsbno = start / LSB_SIZE;
-
        if (!start)
                return;
 
-       if (cmd_q->lsb == lsbno) {
+       if (cmd_q->lsb == start) {
                /* An entry from the private LSB */
-               bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count);
+               bitmap_clear(cmd_q->lsbmap, start, count);
        } else {
                /* From the shared LSBs */
                struct ccp_device *ccp = cmd_q->ccp;
@@ -396,7 +403,7 @@ static int ccp5_perform_rsa(struct ccp_op *op)
        CCP5_CMD_PROT(&desc) = 0;
 
        function.raw = 0;
-       CCP_RSA_SIZE(&function) = op->u.rsa.mod_size;
+       CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
        CCP5_CMD_FUNCTION(&desc) = function.raw;
 
        CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
@@ -411,10 +418,10 @@ static int ccp5_perform_rsa(struct ccp_op *op)
        CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
        CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
 
-       /* Key (Exponent) is in external memory */
-       CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
-       CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
-       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+       /* Exponent is in LSB memory */
+       CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
+       CCP5_CMD_KEY_HI(&desc) = 0;
+       CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
 
        return ccp5_do_cmd(&desc, op->cmd_q);
 }
@@ -751,9 +758,6 @@ static int ccp5_init(struct ccp_device *ccp)
                goto e_pool;
        }
 
-       /* Initialize the queue used to suspend */
-       init_waitqueue_head(&ccp->suspend_queue);
-
        dev_dbg(dev, "Loading LSB map...\n");
        /* Copy the private LSB mask to the public registers */
        status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
index cafa633..511ab04 100644 (file)
@@ -41,7 +41,7 @@ struct ccp_tasklet_data {
 };
 
 /* Human-readable error strings */
-char *ccp_error_codes[] = {
+static char *ccp_error_codes[] = {
        "",
        "ERR 01: ILLEGAL_ENGINE",
        "ERR 02: ILLEGAL_KEY_ID",
@@ -478,6 +478,10 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
        ccp->sb_count = KSB_COUNT;
        ccp->sb_start = 0;
 
+       /* Initialize the wait queues */
+       init_waitqueue_head(&ccp->sb_queue);
+       init_waitqueue_head(&ccp->suspend_queue);
+
        ccp->ord = ccp_increment_unit_ordinal();
        snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord);
        snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", ccp->ord);
index da5f4a6..830f35e 100644 (file)
@@ -278,7 +278,7 @@ struct ccp_cmd_queue {
        /* Private LSB that is assigned to this queue, or -1 if none.
         * Bitmap for my private LSB, unused otherwise
         */
-       unsigned int lsb;
+       int lsb;
        DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE);
 
        /* Queue processing thread */
@@ -515,7 +515,6 @@ struct ccp_op {
                struct ccp_passthru_op passthru;
                struct ccp_ecc_op ecc;
        } u;
-       struct ccp_mem key;
 };
 
 static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
@@ -541,23 +540,23 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info)
  * word 7: upper 16 bits of key pointer; key memory type
  */
 struct dword0 {
-       __le32 soc:1;
-       __le32 ioc:1;
-       __le32 rsvd1:1;
-       __le32 init:1;
-       __le32 eom:1;           /* AES/SHA only */
-       __le32 function:15;
-       __le32 engine:4;
-       __le32 prot:1;
-       __le32 rsvd2:7;
+       unsigned int soc:1;
+       unsigned int ioc:1;
+       unsigned int rsvd1:1;
+       unsigned int init:1;
+       unsigned int eom:1;             /* AES/SHA only */
+       unsigned int function:15;
+       unsigned int engine:4;
+       unsigned int prot:1;
+       unsigned int rsvd2:7;
 };
 
 struct dword3 {
-       __le32 src_hi:16;
-       __le32 src_mem:2;
-       __le32 lsb_cxt_id:8;
-       __le32 rsvd1:5;
-       __le32 fixed:1;
+       unsigned int  src_hi:16;
+       unsigned int  src_mem:2;
+       unsigned int  lsb_cxt_id:8;
+       unsigned int  rsvd1:5;
+       unsigned int  fixed:1;
 };
 
 union dword4 {
@@ -567,18 +566,18 @@ union dword4 {
 
 union dword5 {
        struct {
-               __le32 dst_hi:16;
-               __le32 dst_mem:2;
-               __le32 rsvd1:13;
-               __le32 fixed:1;
+               unsigned int  dst_hi:16;
+               unsigned int  dst_mem:2;
+               unsigned int  rsvd1:13;
+               unsigned int  fixed:1;
        } fields;
        __le32 sha_len_hi;
 };
 
 struct dword7 {
-       __le32 key_hi:16;
-       __le32 key_mem:2;
-       __le32 rsvd1:14;
+       unsigned int  key_hi:16;
+       unsigned int  key_mem:2;
+       unsigned int  rsvd1:14;
 };
 
 struct ccp5_desc {
index 4ce67fb..3e104f5 100644 (file)
@@ -4,6 +4,7 @@ config CRYPTO_DEV_CHELSIO
        select CRYPTO_SHA1
        select CRYPTO_SHA256
        select CRYPTO_SHA512
+       select CRYPTO_AUTHENC
        ---help---
          The Chelsio Crypto Co-processor driver for T6 adapters.
 
index 56b1538..2ed1e24 100644 (file)
 #include <crypto/algapi.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/internal/hash.h>
 
 #include "t4fw_api.h"
 #include "chcr_algo.h"
 #include "chcr_crypto.h"
 
+static inline  struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx)
+{
+       return ctx->crypto_ctx->aeadctx;
+}
+
 static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx)
 {
        return ctx->crypto_ctx->ablkctx;
@@ -72,6 +83,16 @@ static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx)
        return ctx->crypto_ctx->hmacctx;
 }
 
+static inline struct chcr_gcm_ctx *GCM_CTX(struct chcr_aead_ctx *gctx)
+{
+       return gctx->ctx->gcm;
+}
+
+static inline struct chcr_authenc_ctx *AUTHENC_CTX(struct chcr_aead_ctx *gctx)
+{
+       return gctx->ctx->authenc;
+}
+
 static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx)
 {
        return ctx->dev->u_ctx;
@@ -94,12 +115,37 @@ static inline unsigned int sgl_len(unsigned int n)
        return (3 * n) / 2 + (n & 1) + 2;
 }
 
+static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
+{
+       u8 temp[SHA512_DIGEST_SIZE];
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       int authsize = crypto_aead_authsize(tfm);
+       struct cpl_fw6_pld *fw6_pld;
+       int cmp = 0;
+
+       fw6_pld = (struct cpl_fw6_pld *)input;
+       if ((get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) ||
+           (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_GCM)) {
+               cmp = memcmp(&fw6_pld->data[2], (fw6_pld + 1), authsize);
+       } else {
+
+               sg_pcopy_to_buffer(req->src, sg_nents(req->src), temp,
+                               authsize, req->assoclen +
+                               req->cryptlen - authsize);
+               cmp = memcmp(temp, (fw6_pld + 1), authsize);
+       }
+       if (cmp)
+               *err = -EBADMSG;
+       else
+               *err = 0;
+}
+
 /*
  *     chcr_handle_resp - Unmap the DMA buffers associated with the request
  *     @req: crypto request
  */
 int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
-                    int error_status)
+                        int err)
 {
        struct crypto_tfm *tfm = req->tfm;
        struct chcr_context *ctx = crypto_tfm_ctx(tfm);
@@ -109,17 +155,33 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
        unsigned int digestsize, updated_digestsize;
 
        switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+       case CRYPTO_ALG_TYPE_AEAD:
+               ctx_req.req.aead_req = (struct aead_request *)req;
+               ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req);
+               dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.aead_req->dst,
+                            ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE);
+               if (ctx_req.ctx.reqctx->skb) {
+                       kfree_skb(ctx_req.ctx.reqctx->skb);
+                       ctx_req.ctx.reqctx->skb = NULL;
+               }
+               if (ctx_req.ctx.reqctx->verify == VERIFY_SW) {
+                       chcr_verify_tag(ctx_req.req.aead_req, input,
+                                       &err);
+                       ctx_req.ctx.reqctx->verify = VERIFY_HW;
+               }
+               break;
+
        case CRYPTO_ALG_TYPE_BLKCIPHER:
                ctx_req.req.ablk_req = (struct ablkcipher_request *)req;
                ctx_req.ctx.ablk_ctx =
                        ablkcipher_request_ctx(ctx_req.req.ablk_req);
-               if (!error_status) {
+               if (!err) {
                        fw6_pld = (struct cpl_fw6_pld *)input;
                        memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2],
                               AES_BLOCK_SIZE);
                }
                dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst,
-                            ABLK_CTX(ctx)->dst_nents, DMA_FROM_DEVICE);
+                            ctx_req.ctx.ablk_ctx->dst_nents, DMA_FROM_DEVICE);
                if (ctx_req.ctx.ablk_ctx->skb) {
                        kfree_skb(ctx_req.ctx.ablk_ctx->skb);
                        ctx_req.ctx.ablk_ctx->skb = NULL;
@@ -138,8 +200,10 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
                        updated_digestsize = SHA256_DIGEST_SIZE;
                else if (digestsize == SHA384_DIGEST_SIZE)
                        updated_digestsize = SHA512_DIGEST_SIZE;
-               if (ctx_req.ctx.ahash_ctx->skb)
+               if (ctx_req.ctx.ahash_ctx->skb) {
+                       kfree_skb(ctx_req.ctx.ahash_ctx->skb);
                        ctx_req.ctx.ahash_ctx->skb = NULL;
+               }
                if (ctx_req.ctx.ahash_ctx->result == 1) {
                        ctx_req.ctx.ahash_ctx->result = 0;
                        memcpy(ctx_req.req.ahash_req->result, input +
@@ -150,11 +214,9 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
                               sizeof(struct cpl_fw6_pld),
                               updated_digestsize);
                }
-               kfree(ctx_req.ctx.ahash_ctx->dummy_payload_ptr);
-               ctx_req.ctx.ahash_ctx->dummy_payload_ptr = NULL;
                break;
        }
-       return 0;
+       return err;
 }
 
 /*
@@ -178,40 +240,81 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
        return flits + sgl_len(cnt);
 }
 
-static struct shash_desc *chcr_alloc_shash(unsigned int ds)
+static inline void get_aes_decrypt_key(unsigned char *dec_key,
+                                      const unsigned char *key,
+                                      unsigned int keylength)
+{
+       u32 temp;
+       u32 w_ring[MAX_NK];
+       int i, j, k;
+       u8  nr, nk;
+
+       switch (keylength) {
+       case AES_KEYLENGTH_128BIT:
+               nk = KEYLENGTH_4BYTES;
+               nr = NUMBER_OF_ROUNDS_10;
+               break;
+       case AES_KEYLENGTH_192BIT:
+               nk = KEYLENGTH_6BYTES;
+               nr = NUMBER_OF_ROUNDS_12;
+               break;
+       case AES_KEYLENGTH_256BIT:
+               nk = KEYLENGTH_8BYTES;
+               nr = NUMBER_OF_ROUNDS_14;
+               break;
+       default:
+               return;
+       }
+       for (i = 0; i < nk; i++)
+               w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]);
+
+       i = 0;
+       temp = w_ring[nk - 1];
+       while (i + nk < (nr + 1) * 4) {
+               if (!(i % nk)) {
+                       /* RotWord(temp) */
+                       temp = (temp << 8) | (temp >> 24);
+                       temp = aes_ks_subword(temp);
+                       temp ^= round_constant[i / nk];
+               } else if (nk == 8 && (i % 4 == 0)) {
+                       temp = aes_ks_subword(temp);
+               }
+               w_ring[i % nk] ^= temp;
+               temp = w_ring[i % nk];
+               i++;
+       }
+       i--;
+       for (k = 0, j = i % nk; k < nk; k++) {
+               *((u32 *)dec_key + k) = htonl(w_ring[j]);
+               j--;
+               if (j < 0)
+                       j += nk;
+       }
+}
+
+static struct crypto_shash *chcr_alloc_shash(unsigned int ds)
 {
        struct crypto_shash *base_hash = NULL;
-       struct shash_desc *desc;
 
        switch (ds) {
        case SHA1_DIGEST_SIZE:
-               base_hash = crypto_alloc_shash("sha1-generic", 0, 0);
+               base_hash = crypto_alloc_shash("sha1", 0, 0);
                break;
        case SHA224_DIGEST_SIZE:
-               base_hash = crypto_alloc_shash("sha224-generic", 0, 0);
+               base_hash = crypto_alloc_shash("sha224", 0, 0);
                break;
        case SHA256_DIGEST_SIZE:
-               base_hash = crypto_alloc_shash("sha256-generic", 0, 0);
+               base_hash = crypto_alloc_shash("sha256", 0, 0);
                break;
        case SHA384_DIGEST_SIZE:
-               base_hash = crypto_alloc_shash("sha384-generic", 0, 0);
+               base_hash = crypto_alloc_shash("sha384", 0, 0);
                break;
        case SHA512_DIGEST_SIZE:
-               base_hash = crypto_alloc_shash("sha512-generic", 0, 0);
+               base_hash = crypto_alloc_shash("sha512", 0, 0);
                break;
        }
-       if (IS_ERR(base_hash)) {
-               pr_err("Can not allocate sha-generic algo.\n");
-               return (void *)base_hash;
-       }
 
-       desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(base_hash),
-                      GFP_KERNEL);
-       if (!desc)
-               return ERR_PTR(-ENOMEM);
-       desc->tfm = base_hash;
-       desc->flags = crypto_shash_get_flags(base_hash);
-       return desc;
+       return base_hash;
 }
 
 static int chcr_compute_partial_hash(struct shash_desc *desc,
@@ -279,31 +382,18 @@ static inline int is_hmac(struct crypto_tfm *tfm)
        struct chcr_alg_template *chcr_crypto_alg =
                container_of(__crypto_ahash_alg(alg), struct chcr_alg_template,
                             alg.hash);
-       if ((chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK) ==
-           CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
+       if (chcr_crypto_alg->type == CRYPTO_ALG_TYPE_HMAC)
                return 1;
        return 0;
 }
 
-static inline unsigned int ch_nents(struct scatterlist *sg,
-                                   unsigned int *total_size)
-{
-       unsigned int nents;
-
-       for (nents = 0, *total_size = 0; sg; sg = sg_next(sg)) {
-               nents++;
-               *total_size += sg->length;
-       }
-       return nents;
-}
-
 static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
                           struct scatterlist *sg,
                           struct phys_sge_parm *sg_param)
 {
        struct phys_sge_pairs *to;
-       unsigned int out_buf_size = sg_param->obsize;
-       unsigned int nents = sg_param->nents, i, j, tot_len = 0;
+       int out_buf_size = sg_param->obsize;
+       unsigned int nents = sg_param->nents, i, j = 0;
 
        phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL)
                                    | CPL_RX_PHYS_DSGL_ISRDMA_V(0));
@@ -321,25 +411,24 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
                                       sizeof(struct cpl_rx_phys_dsgl));
 
        for (i = 0; nents; to++) {
-               for (j = i; (nents && (j < (8 + i))); j++, nents--) {
-                       to->len[j] = htons(sg->length);
+               for (j = 0; j < 8 && nents; j++, nents--) {
+                       out_buf_size -= sg_dma_len(sg);
+                       to->len[j] = htons(sg_dma_len(sg));
                        to->addr[j] = cpu_to_be64(sg_dma_address(sg));
-                       if (out_buf_size) {
-                               if (tot_len + sg_dma_len(sg) >= out_buf_size) {
-                                       to->len[j] = htons(out_buf_size -
-                                                          tot_len);
-                                       return;
-                               }
-                               tot_len += sg_dma_len(sg);
-                       }
                        sg = sg_next(sg);
                }
        }
+       if (out_buf_size) {
+               j--;
+               to--;
+               to->len[j] = htons(ntohs(to->len[j]) + (out_buf_size));
+       }
 }
 
-static inline unsigned
-int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl,
-                        struct scatterlist *sg, struct phys_sge_parm *sg_param)
+static inline int map_writesg_phys_cpl(struct device *dev,
+                                       struct cpl_rx_phys_dsgl *phys_cpl,
+                                       struct scatterlist *sg,
+                                       struct phys_sge_parm *sg_param)
 {
        if (!sg || !sg_param->nents)
                return 0;
@@ -353,6 +442,14 @@ int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl,
        return 0;
 }
 
+static inline int get_aead_subtype(struct crypto_aead *aead)
+{
+       struct aead_alg *alg = crypto_aead_alg(aead);
+       struct chcr_alg_template *chcr_crypto_alg =
+               container_of(alg, struct chcr_alg_template, alg.aead);
+       return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
+}
+
 static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
 {
        struct crypto_alg *alg = tfm->__crt_alg;
@@ -362,8 +459,23 @@ static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
        return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
 }
 
+static inline void write_buffer_to_skb(struct sk_buff *skb,
+                                       unsigned int *frags,
+                                       char *bfr,
+                                       u8 bfr_len)
+{
+       skb->len += bfr_len;
+       skb->data_len += bfr_len;
+       skb->truesize += bfr_len;
+       get_page(virt_to_page(bfr));
+       skb_fill_page_desc(skb, *frags, virt_to_page(bfr),
+                          offset_in_page(bfr), bfr_len);
+       (*frags)++;
+}
+
+
 static inline void
-write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags,
+write_sg_to_skb(struct sk_buff *skb, unsigned int *frags,
                        struct scatterlist *sg, unsigned int count)
 {
        struct page *spage;
@@ -372,8 +484,9 @@ write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags,
        skb->len += count;
        skb->data_len += count;
        skb->truesize += count;
+
        while (count > 0) {
-               if (sg && (!(sg->length)))
+               if (!sg || (!(sg->length)))
                        break;
                spage = sg_page(sg);
                get_page(spage);
@@ -389,29 +502,25 @@ static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
                               struct _key_ctx *key_ctx)
 {
        if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
-               get_aes_decrypt_key(key_ctx->key, ablkctx->key,
-                                   ablkctx->enckey_len << 3);
-               memset(key_ctx->key + ablkctx->enckey_len, 0,
-                      CHCR_AES_MAX_KEY_LEN - ablkctx->enckey_len);
+               memcpy(key_ctx->key, ablkctx->rrkey, ablkctx->enckey_len);
        } else {
                memcpy(key_ctx->key,
                       ablkctx->key + (ablkctx->enckey_len >> 1),
                       ablkctx->enckey_len >> 1);
-               get_aes_decrypt_key(key_ctx->key + (ablkctx->enckey_len >> 1),
-                                   ablkctx->key, ablkctx->enckey_len << 2);
+               memcpy(key_ctx->key + (ablkctx->enckey_len >> 1),
+                      ablkctx->rrkey, ablkctx->enckey_len >> 1);
        }
        return 0;
 }
 
 static inline void create_wreq(struct chcr_context *ctx,
-                              struct fw_crypto_lookaside_wr *wreq,
+                              struct chcr_wr *chcr_req,
                               void *req, struct sk_buff *skb,
                               int kctx_len, int hash_sz,
-                              unsigned int phys_dsgl)
+                              int is_iv,
+                              unsigned int sc_len)
 {
        struct uld_ctx *u_ctx = ULD_CTX(ctx);
-       struct ulp_txpkt *ulptx = (struct ulp_txpkt *)(wreq + 1);
-       struct ulptx_idata *sc_imm = (struct ulptx_idata *)(ulptx + 1);
        int iv_loc = IV_DSGL;
        int qid = u_ctx->lldi.rxq_ids[ctx->tx_channel_id];
        unsigned int immdatalen = 0, nr_frags = 0;
@@ -423,27 +532,27 @@ static inline void create_wreq(struct chcr_context *ctx,
                nr_frags = skb_shinfo(skb)->nr_frags;
        }
 
-       wreq->op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen,
-                                                    (kctx_len >> 4));
-       wreq->pld_size_hash_size =
+       chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen,
+                               ((sizeof(chcr_req->key_ctx) + kctx_len) >> 4));
+       chcr_req->wreq.pld_size_hash_size =
                htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(sgl_lengths[nr_frags]) |
                      FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(hash_sz));
-       wreq->len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(
+       chcr_req->wreq.len16_pkd =
+               htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(
                                    (calc_tx_flits_ofld(skb) * 8), 16)));
-       wreq->cookie = cpu_to_be64((uintptr_t)req);
-       wreq->rx_chid_to_rx_q_id =
+       chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req);
+       chcr_req->wreq.rx_chid_to_rx_q_id =
                FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid,
-                               (hash_sz) ? IV_NOP : iv_loc);
+                               is_iv ? iv_loc : IV_NOP);
 
-       ulptx->cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id);
-       ulptx->len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8),
-                                        16) - ((sizeof(*wreq)) >> 4)));
+       chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id);
+       chcr_req->ulptx.len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8),
+                                       16) - ((sizeof(chcr_req->wreq)) >> 4)));
 
-       sc_imm->cmd_more = FILL_CMD_MORE(immdatalen);
-       sc_imm->len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + kctx_len +
-                                 ((hash_sz) ? DUMMY_BYTES :
-                                 (sizeof(struct cpl_rx_phys_dsgl) +
-                                  phys_dsgl)) + immdatalen);
+       chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(immdatalen);
+       chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
+                                  sizeof(chcr_req->key_ctx) +
+                                  kctx_len + sc_len + immdatalen);
 }
 
 /**
@@ -454,86 +563,83 @@ static inline void create_wreq(struct chcr_context *ctx,
  *     @op_type:       encryption or decryption
  */
 static struct sk_buff
-*create_cipher_wr(struct crypto_async_request *req_base,
-                 struct chcr_context *ctx, unsigned short qid,
+*create_cipher_wr(struct ablkcipher_request *req,
+                 unsigned short qid,
                  unsigned short op_type)
 {
-       struct ablkcipher_request *req = (struct ablkcipher_request *)req_base;
        struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+       struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
        struct uld_ctx *u_ctx = ULD_CTX(ctx);
        struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
        struct sk_buff *skb = NULL;
-       struct _key_ctx *key_ctx;
-       struct fw_crypto_lookaside_wr *wreq;
-       struct cpl_tx_sec_pdu *sec_cpl;
+       struct chcr_wr *chcr_req;
        struct cpl_rx_phys_dsgl *phys_cpl;
-       struct chcr_blkcipher_req_ctx *req_ctx = ablkcipher_request_ctx(req);
+       struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
        struct phys_sge_parm sg_param;
-       unsigned int frags = 0, transhdr_len, phys_dsgl, dst_bufsize = 0;
+       unsigned int frags = 0, transhdr_len, phys_dsgl;
        unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len;
+       gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+                       GFP_ATOMIC;
 
        if (!req->info)
                return ERR_PTR(-EINVAL);
-       ablkctx->dst_nents = ch_nents(req->dst, &dst_bufsize);
-       ablkctx->enc = op_type;
-
+       reqctx->dst_nents = sg_nents_for_len(req->dst, req->nbytes);
+       if (reqctx->dst_nents <= 0) {
+               pr_err("AES:Invalid Destination sg lists\n");
+               return ERR_PTR(-EINVAL);
+       }
        if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
-           (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE))
+           (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) {
+               pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
+                      ablkctx->enckey_len, req->nbytes, ivsize);
                return ERR_PTR(-EINVAL);
+       }
 
-       phys_dsgl = get_space_for_phys_dsgl(ablkctx->dst_nents);
+       phys_dsgl = get_space_for_phys_dsgl(reqctx->dst_nents);
 
-       kctx_len = sizeof(*key_ctx) +
-               (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+       kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
        transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
-       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
-                       GFP_ATOMIC);
+       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
        if (!skb)
                return ERR_PTR(-ENOMEM);
        skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-       wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
-
-       sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
-       sec_cpl->op_ivinsrtofst =
-               FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1, 1);
-
-       sec_cpl->pldlen = htonl(ivsize + req->nbytes);
-       sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0,
-                                                               ivsize + 1, 0);
-
-       sec_cpl->cipherstop_lo_authinsert =  FILL_SEC_CPL_AUTHINSERT(0, 0,
-                                                                    0, 0);
-       sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
+       chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+       memset(chcr_req, 0, transhdr_len);
+       chcr_req->sec_cpl.op_ivinsrtofst =
+               FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1);
+
+       chcr_req->sec_cpl.pldlen = htonl(ivsize + req->nbytes);
+       chcr_req->sec_cpl.aadstart_cipherstop_hi =
+                       FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, ivsize + 1, 0);
+
+       chcr_req->sec_cpl.cipherstop_lo_authinsert =
+                       FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0);
+       chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
                                                         ablkctx->ciph_mode,
-                                                        0, 0, ivsize >> 1, 1);
-       sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
+                                                        0, 0, ivsize >> 1);
+       chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
                                                          0, 1, phys_dsgl);
 
-       key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
-       key_ctx->ctx_hdr = ablkctx->key_ctx_hdr;
+       chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr;
        if (op_type == CHCR_DECRYPT_OP) {
-               if (generate_copy_rrkey(ablkctx, key_ctx))
-                       goto map_fail1;
+               generate_copy_rrkey(ablkctx, &chcr_req->key_ctx);
        } else {
                if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
-                       memcpy(key_ctx->key, ablkctx->key, ablkctx->enckey_len);
+                       memcpy(chcr_req->key_ctx.key, ablkctx->key,
+                              ablkctx->enckey_len);
                } else {
-                       memcpy(key_ctx->key, ablkctx->key +
+                       memcpy(chcr_req->key_ctx.key, ablkctx->key +
                               (ablkctx->enckey_len >> 1),
                               ablkctx->enckey_len >> 1);
-                       memcpy(key_ctx->key +
+                       memcpy(chcr_req->key_ctx.key +
                               (ablkctx->enckey_len >> 1),
                               ablkctx->key,
                               ablkctx->enckey_len >> 1);
                }
        }
-       phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)key_ctx + kctx_len);
-
-       memcpy(ablkctx->iv, req->info, ivsize);
-       sg_init_table(&ablkctx->iv_sg, 1);
-       sg_set_buf(&ablkctx->iv_sg, ablkctx->iv, ivsize);
-       sg_param.nents = ablkctx->dst_nents;
-       sg_param.obsize = dst_bufsize;
+       phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+       sg_param.nents = reqctx->dst_nents;
+       sg_param.obsize = req->nbytes;
        sg_param.qid = qid;
        sg_param.align = 1;
        if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst,
@@ -541,10 +647,12 @@ static struct sk_buff
                goto map_fail1;
 
        skb_set_transport_header(skb, transhdr_len);
-       write_sg_data_page_desc(skb, &frags, &ablkctx->iv_sg, ivsize);
-       write_sg_data_page_desc(skb, &frags, req->src, req->nbytes);
-       create_wreq(ctx, wreq, req, skb, kctx_len, 0, phys_dsgl);
-       req_ctx->skb = skb;
+       memcpy(reqctx->iv, req->info, ivsize);
+       write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
+       write_sg_to_skb(skb, &frags, req->src, req->nbytes);
+       create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
+                       sizeof(struct cpl_rx_phys_dsgl) + phys_dsgl);
+       reqctx->skb = skb;
        skb_get(skb);
        return skb;
 map_fail1:
@@ -557,15 +665,9 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 {
        struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
        struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
-       struct ablkcipher_alg *alg = crypto_ablkcipher_alg(tfm);
        unsigned int ck_size, context_size;
        u16 alignment = 0;
 
-       if ((keylen < alg->min_keysize) || (keylen > alg->max_keysize))
-               goto badkey_err;
-
-       memcpy(ablkctx->key, key, keylen);
-       ablkctx->enckey_len = keylen;
        if (keylen == AES_KEYSIZE_128) {
                ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
        } else if (keylen == AES_KEYSIZE_192) {
@@ -576,7 +678,9 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
        } else {
                goto badkey_err;
        }
-
+       memcpy(ablkctx->key, key, keylen);
+       ablkctx->enckey_len = keylen;
+       get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, keylen << 3);
        context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
                        keylen + alignment) >> 4;
 
@@ -612,7 +716,6 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req)
 {
        struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
        struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
-       struct crypto_async_request *req_base = &req->base;
        struct uld_ctx *u_ctx = ULD_CTX(ctx);
        struct sk_buff *skb;
 
@@ -622,8 +725,7 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req)
                        return -EBUSY;
        }
 
-       skb = create_cipher_wr(req_base, ctx,
-                              u_ctx->lldi.rxq_ids[ctx->tx_channel_id],
+       skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[ctx->tx_channel_id],
                               CHCR_ENCRYPT_OP);
        if (IS_ERR(skb)) {
                pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
@@ -639,7 +741,6 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
 {
        struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
        struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
-       struct crypto_async_request *req_base = &req->base;
        struct uld_ctx *u_ctx = ULD_CTX(ctx);
        struct sk_buff *skb;
 
@@ -649,7 +750,7 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req)
                        return -EBUSY;
        }
 
-       skb = create_cipher_wr(req_base, ctx, u_ctx->lldi.rxq_ids[0],
+       skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[0],
                               CHCR_DECRYPT_OP);
        if (IS_ERR(skb)) {
                pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
@@ -729,50 +830,33 @@ static int get_alg_config(struct algo_param *params,
        return 0;
 }
 
-static inline int
-write_buffer_data_page_desc(struct chcr_ahash_req_ctx *req_ctx,
-                           struct sk_buff *skb, unsigned int *frags, char *bfr,
-                           u8 bfr_len)
+static inline void chcr_free_shash(struct crypto_shash *base_hash)
 {
-       void *page_ptr = NULL;
-
-       skb->len += bfr_len;
-       skb->data_len += bfr_len;
-       skb->truesize += bfr_len;
-       page_ptr = kmalloc(CHCR_HASH_MAX_BLOCK_SIZE_128, GFP_ATOMIC | GFP_DMA);
-       if (!page_ptr)
-               return -ENOMEM;
-       get_page(virt_to_page(page_ptr));
-       req_ctx->dummy_payload_ptr = page_ptr;
-       memcpy(page_ptr, bfr, bfr_len);
-       skb_fill_page_desc(skb, *frags, virt_to_page(page_ptr),
-                          offset_in_page(page_ptr), bfr_len);
-       (*frags)++;
-       return 0;
+               crypto_free_shash(base_hash);
 }
 
 /**
- *     create_final_hash_wr - Create hash work request
+ *     create_hash_wr - Create hash work request
  *     @req - Cipher req base
  */
-static struct sk_buff *create_final_hash_wr(struct ahash_request *req,
-                                           struct hash_wr_param *param)
+static struct sk_buff *create_hash_wr(struct ahash_request *req,
+                                     struct hash_wr_param *param)
 {
        struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
        struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
        struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
        struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
        struct sk_buff *skb = NULL;
-       struct _key_ctx *key_ctx;
-       struct fw_crypto_lookaside_wr *wreq;
-       struct cpl_tx_sec_pdu *sec_cpl;
+       struct chcr_wr *chcr_req;
        unsigned int frags = 0, transhdr_len, iopad_alignment = 0;
        unsigned int digestsize = crypto_ahash_digestsize(tfm);
-       unsigned int kctx_len = sizeof(*key_ctx);
+       unsigned int kctx_len = 0;
        u8 hash_size_in_response = 0;
+       gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+               GFP_ATOMIC;
 
        iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
-       kctx_len += param->alg_prm.result_size + iopad_alignment;
+       kctx_len = param->alg_prm.result_size + iopad_alignment;
        if (param->opad_needed)
                kctx_len += param->alg_prm.result_size + iopad_alignment;
 
@@ -781,54 +865,54 @@ static struct sk_buff *create_final_hash_wr(struct ahash_request *req,
        else
                hash_size_in_response = param->alg_prm.result_size;
        transhdr_len = HASH_TRANSHDR_SIZE(kctx_len);
-       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
-                       GFP_ATOMIC);
+       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
        if (!skb)
                return skb;
 
        skb_reserve(skb, sizeof(struct sge_opaque_hdr));
-       wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
-       memset(wreq, 0, transhdr_len);
+       chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+       memset(chcr_req, 0, transhdr_len);
 
-       sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
-       sec_cpl->op_ivinsrtofst =
-               FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0, 0);
-       sec_cpl->pldlen = htonl(param->bfr_len + param->sg_len);
+       chcr_req->sec_cpl.op_ivinsrtofst =
+               FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0);
+       chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len);
 
-       sec_cpl->aadstart_cipherstop_hi =
+       chcr_req->sec_cpl.aadstart_cipherstop_hi =
                FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, 0, 0);
-       sec_cpl->cipherstop_lo_authinsert =
+       chcr_req->sec_cpl.cipherstop_lo_authinsert =
                FILL_SEC_CPL_AUTHINSERT(0, 1, 0, 0);
-       sec_cpl->seqno_numivs =
+       chcr_req->sec_cpl.seqno_numivs =
                FILL_SEC_CPL_SCMD0_SEQNO(0, 0, 0, param->alg_prm.auth_mode,
-                                        param->opad_needed, 0, 0);
+                                        param->opad_needed, 0);
 
-       sec_cpl->ivgen_hdrlen =
+       chcr_req->sec_cpl.ivgen_hdrlen =
                FILL_SEC_CPL_IVGEN_HDRLEN(param->last, param->more, 0, 1, 0, 0);
 
-       key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
-       memcpy(key_ctx->key, req_ctx->partial_hash, param->alg_prm.result_size);
+       memcpy(chcr_req->key_ctx.key, req_ctx->partial_hash,
+              param->alg_prm.result_size);
 
        if (param->opad_needed)
-               memcpy(key_ctx->key + ((param->alg_prm.result_size <= 32) ? 32 :
-                                      CHCR_HASH_MAX_DIGEST_SIZE),
+               memcpy(chcr_req->key_ctx.key +
+                      ((param->alg_prm.result_size <= 32) ? 32 :
+                       CHCR_HASH_MAX_DIGEST_SIZE),
                       hmacctx->opad, param->alg_prm.result_size);
 
-       key_ctx->ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY,
+       chcr_req->key_ctx.ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY,
                                            param->alg_prm.mk_size, 0,
                                            param->opad_needed,
-                                           (kctx_len >> 4));
-       sec_cpl->scmd1 = cpu_to_be64((u64)param->scmd1);
+                                           ((kctx_len +
+                                            sizeof(chcr_req->key_ctx)) >> 4));
+       chcr_req->sec_cpl.scmd1 = cpu_to_be64((u64)param->scmd1);
 
        skb_set_transport_header(skb, transhdr_len);
        if (param->bfr_len != 0)
-               write_buffer_data_page_desc(req_ctx, skb, &frags, req_ctx->bfr,
-                                           param->bfr_len);
+               write_buffer_to_skb(skb, &frags, req_ctx->reqbfr,
+                                   param->bfr_len);
        if (param->sg_len != 0)
-               write_sg_data_page_desc(skb, &frags, req->src, param->sg_len);
+               write_sg_to_skb(skb, &frags, req->src, param->sg_len);
 
-       create_wreq(ctx, wreq, req, skb, kctx_len, hash_size_in_response,
-                   0);
+       create_wreq(ctx, chcr_req, req, skb, kctx_len, hash_size_in_response, 0,
+                       DUMMY_BYTES);
        req_ctx->skb = skb;
        skb_get(skb);
        return skb;
@@ -854,34 +938,40 @@ static int chcr_ahash_update(struct ahash_request *req)
                        return -EBUSY;
        }
 
-       if (nbytes + req_ctx->bfr_len >= bs) {
-               remainder = (nbytes + req_ctx->bfr_len) % bs;
-               nbytes = nbytes + req_ctx->bfr_len - remainder;
+       if (nbytes + req_ctx->reqlen >= bs) {
+               remainder = (nbytes + req_ctx->reqlen) % bs;
+               nbytes = nbytes + req_ctx->reqlen - remainder;
        } else {
-               sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->bfr +
-                                  req_ctx->bfr_len, nbytes, 0);
-               req_ctx->bfr_len += nbytes;
+               sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->reqbfr
+                                  + req_ctx->reqlen, nbytes, 0);
+               req_ctx->reqlen += nbytes;
                return 0;
        }
 
        params.opad_needed = 0;
        params.more = 1;
        params.last = 0;
-       params.sg_len = nbytes - req_ctx->bfr_len;
-       params.bfr_len = req_ctx->bfr_len;
+       params.sg_len = nbytes - req_ctx->reqlen;
+       params.bfr_len = req_ctx->reqlen;
        params.scmd1 = 0;
        get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
        req_ctx->result = 0;
        req_ctx->data_len += params.sg_len + params.bfr_len;
-       skb = create_final_hash_wr(req, &params);
+       skb = create_hash_wr(req, &params);
        if (!skb)
                return -ENOMEM;
 
-       req_ctx->bfr_len = remainder;
-       if (remainder)
+       if (remainder) {
+               u8 *temp;
+               /* Swap buffers */
+               temp = req_ctx->reqbfr;
+               req_ctx->reqbfr = req_ctx->skbfr;
+               req_ctx->skbfr = temp;
                sg_pcopy_to_buffer(req->src, sg_nents(req->src),
-                                  req_ctx->bfr, remainder, req->nbytes -
+                                  req_ctx->reqbfr, remainder, req->nbytes -
                                   remainder);
+       }
+       req_ctx->reqlen = remainder;
        skb->dev = u_ctx->lldi.ports[0];
        set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
        chcr_send_wr(skb);
@@ -917,10 +1007,10 @@ static int chcr_ahash_final(struct ahash_request *req)
        params.sg_len = 0;
        get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
        req_ctx->result = 1;
-       params.bfr_len = req_ctx->bfr_len;
+       params.bfr_len = req_ctx->reqlen;
        req_ctx->data_len += params.bfr_len + params.sg_len;
-       if (req_ctx->bfr && (req_ctx->bfr_len == 0)) {
-               create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+       if (req_ctx->reqlen == 0) {
+               create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len);
                params.last = 0;
                params.more = 1;
                params.scmd1 = 0;
@@ -931,7 +1021,10 @@ static int chcr_ahash_final(struct ahash_request *req)
                params.last = 1;
                params.more = 0;
        }
-       skb = create_final_hash_wr(req, &params);
+       skb = create_hash_wr(req, &params);
+       if (!skb)
+               return -ENOMEM;
+
        skb->dev = u_ctx->lldi.ports[0];
        set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
        chcr_send_wr(skb);
@@ -963,12 +1056,12 @@ static int chcr_ahash_finup(struct ahash_request *req)
                params.opad_needed = 0;
 
        params.sg_len = req->nbytes;
-       params.bfr_len = req_ctx->bfr_len;
+       params.bfr_len = req_ctx->reqlen;
        get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
        req_ctx->data_len += params.bfr_len + params.sg_len;
        req_ctx->result = 1;
-       if (req_ctx->bfr && (req_ctx->bfr_len + req->nbytes) == 0) {
-               create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+       if ((req_ctx->reqlen + req->nbytes) == 0) {
+               create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len);
                params.last = 0;
                params.more = 1;
                params.scmd1 = 0;
@@ -979,9 +1072,10 @@ static int chcr_ahash_finup(struct ahash_request *req)
                params.more = 0;
        }
 
-       skb = create_final_hash_wr(req, &params);
+       skb = create_hash_wr(req, &params);
        if (!skb)
                return -ENOMEM;
+
        skb->dev = u_ctx->lldi.ports[0];
        set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
        chcr_send_wr(skb);
@@ -1023,13 +1117,13 @@ static int chcr_ahash_digest(struct ahash_request *req)
        req_ctx->result = 1;
        req_ctx->data_len += params.bfr_len + params.sg_len;
 
-       if (req_ctx->bfr && req->nbytes == 0) {
-               create_last_hash_block(req_ctx->bfr, bs, 0);
+       if (req->nbytes == 0) {
+               create_last_hash_block(req_ctx->reqbfr, bs, 0);
                params.more = 1;
                params.bfr_len = bs;
        }
 
-       skb = create_final_hash_wr(req, &params);
+       skb = create_hash_wr(req, &params);
        if (!skb)
                return -ENOMEM;
 
@@ -1044,12 +1138,12 @@ static int chcr_ahash_export(struct ahash_request *areq, void *out)
        struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
        struct chcr_ahash_req_ctx *state = out;
 
-       state->bfr_len = req_ctx->bfr_len;
+       state->reqlen = req_ctx->reqlen;
        state->data_len = req_ctx->data_len;
-       memcpy(state->bfr, req_ctx->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+       memcpy(state->bfr1, req_ctx->reqbfr, req_ctx->reqlen);
        memcpy(state->partial_hash, req_ctx->partial_hash,
               CHCR_HASH_MAX_DIGEST_SIZE);
-       return 0;
+               return 0;
 }
 
 static int chcr_ahash_import(struct ahash_request *areq, const void *in)
@@ -1057,10 +1151,11 @@ static int chcr_ahash_import(struct ahash_request *areq, const void *in)
        struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
        struct chcr_ahash_req_ctx *state = (struct chcr_ahash_req_ctx *)in;
 
-       req_ctx->bfr_len = state->bfr_len;
+       req_ctx->reqlen = state->reqlen;
        req_ctx->data_len = state->data_len;
-       req_ctx->dummy_payload_ptr = NULL;
-       memcpy(req_ctx->bfr, state->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+       req_ctx->reqbfr = req_ctx->bfr1;
+       req_ctx->skbfr = req_ctx->bfr2;
+       memcpy(req_ctx->bfr1, state->bfr1, CHCR_HASH_MAX_BLOCK_SIZE_128);
        memcpy(req_ctx->partial_hash, state->partial_hash,
               CHCR_HASH_MAX_DIGEST_SIZE);
        return 0;
@@ -1075,15 +1170,16 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
        unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
        unsigned int i, err = 0, updated_digestsize;
 
-       /*
-        * use the key to calculate the ipad and opad. ipad will sent with the
+       SHASH_DESC_ON_STACK(shash, hmacctx->base_hash);
+
+       /* use the key to calculate the ipad and opad. ipad will sent with the
         * first request's data. opad will be sent with the final hash result
         * ipad in hmacctx->ipad and opad in hmacctx->opad location
         */
-       if (!hmacctx->desc)
-               return -EINVAL;
+       shash->tfm = hmacctx->base_hash;
+       shash->flags = crypto_shash_get_flags(hmacctx->base_hash);
        if (keylen > bs) {
-               err = crypto_shash_digest(hmacctx->desc, key, keylen,
+               err = crypto_shash_digest(shash, key, keylen,
                                          hmacctx->ipad);
                if (err)
                        goto out;
@@ -1104,13 +1200,13 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
                updated_digestsize = SHA256_DIGEST_SIZE;
        else if (digestsize == SHA384_DIGEST_SIZE)
                updated_digestsize = SHA512_DIGEST_SIZE;
-       err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->ipad,
+       err = chcr_compute_partial_hash(shash, hmacctx->ipad,
                                        hmacctx->ipad, digestsize);
        if (err)
                goto out;
        chcr_change_order(hmacctx->ipad, updated_digestsize);
 
-       err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->opad,
+       err = chcr_compute_partial_hash(shash, hmacctx->opad,
                                        hmacctx->opad, digestsize);
        if (err)
                goto out;
@@ -1124,28 +1220,29 @@ static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 {
        struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
        struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
-       int status = 0;
        unsigned short context_size = 0;
 
-       if ((key_len == (AES_KEYSIZE_128 << 1)) ||
-           (key_len == (AES_KEYSIZE_256 << 1))) {
-               memcpy(ablkctx->key, key, key_len);
-               ablkctx->enckey_len = key_len;
-               context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4;
-               ablkctx->key_ctx_hdr =
-                       FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ?
-                                        CHCR_KEYCTX_CIPHER_KEY_SIZE_128 :
-                                        CHCR_KEYCTX_CIPHER_KEY_SIZE_256,
-                                        CHCR_KEYCTX_NO_KEY, 1,
-                                        0, context_size);
-               ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
-       } else {
+       if ((key_len != (AES_KEYSIZE_128 << 1)) &&
+           (key_len != (AES_KEYSIZE_256 << 1))) {
                crypto_tfm_set_flags((struct crypto_tfm *)tfm,
                                     CRYPTO_TFM_RES_BAD_KEY_LEN);
                ablkctx->enckey_len = 0;
-               status = -EINVAL;
+               return -EINVAL;
+
        }
-       return status;
+
+       memcpy(ablkctx->key, key, key_len);
+       ablkctx->enckey_len = key_len;
+       get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, key_len << 2);
+       context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4;
+       ablkctx->key_ctx_hdr =
+               FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ?
+                                CHCR_KEYCTX_CIPHER_KEY_SIZE_128 :
+                                CHCR_KEYCTX_CIPHER_KEY_SIZE_256,
+                                CHCR_KEYCTX_NO_KEY, 1,
+                                0, context_size);
+       ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
+       return 0;
 }
 
 static int chcr_sha_init(struct ahash_request *areq)
@@ -1155,8 +1252,9 @@ static int chcr_sha_init(struct ahash_request *areq)
        int digestsize =  crypto_ahash_digestsize(tfm);
 
        req_ctx->data_len = 0;
-       req_ctx->dummy_payload_ptr = NULL;
-       req_ctx->bfr_len = 0;
+       req_ctx->reqlen = 0;
+       req_ctx->reqbfr = req_ctx->bfr1;
+       req_ctx->skbfr = req_ctx->bfr2;
        req_ctx->skb = NULL;
        req_ctx->result = 0;
        copy_hash_init_values(req_ctx->partial_hash, digestsize);
@@ -1204,29 +1302,1184 @@ static int chcr_hmac_cra_init(struct crypto_tfm *tfm)
 
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
                                 sizeof(struct chcr_ahash_req_ctx));
-       hmacctx->desc = chcr_alloc_shash(digestsize);
-       if (IS_ERR(hmacctx->desc))
-               return PTR_ERR(hmacctx->desc);
+       hmacctx->base_hash = chcr_alloc_shash(digestsize);
+       if (IS_ERR(hmacctx->base_hash))
+               return PTR_ERR(hmacctx->base_hash);
        return chcr_device_init(crypto_tfm_ctx(tfm));
 }
 
-static void chcr_free_shash(struct shash_desc *desc)
-{
-       crypto_free_shash(desc->tfm);
-       kfree(desc);
-}
-
 static void chcr_hmac_cra_exit(struct crypto_tfm *tfm)
 {
        struct chcr_context *ctx = crypto_tfm_ctx(tfm);
        struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
 
-       if (hmacctx->desc) {
-               chcr_free_shash(hmacctx->desc);
-               hmacctx->desc = NULL;
+       if (hmacctx->base_hash) {
+               chcr_free_shash(hmacctx->base_hash);
+               hmacctx->base_hash = NULL;
+       }
+}
+
+static int chcr_copy_assoc(struct aead_request *req,
+                               struct chcr_aead_ctx *ctx)
+{
+       SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
+
+       skcipher_request_set_tfm(skreq, ctx->null);
+       skcipher_request_set_callback(skreq, aead_request_flags(req),
+                       NULL, NULL);
+       skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
+                       NULL);
+
+       return crypto_skcipher_encrypt(skreq);
+}
+
+static unsigned char get_hmac(unsigned int authsize)
+{
+       switch (authsize) {
+       case ICV_8:
+               return CHCR_SCMD_HMAC_CTRL_PL1;
+       case ICV_10:
+               return CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366;
+       case ICV_12:
+               return CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+       }
+       return CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+}
+
+
+static struct sk_buff *create_authenc_wr(struct aead_request *req,
+                                        unsigned short qid,
+                                        int size,
+                                        unsigned short op_type)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chcr_context *ctx = crypto_aead_ctx(tfm);
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+       struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
+       struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+       struct sk_buff *skb = NULL;
+       struct chcr_wr *chcr_req;
+       struct cpl_rx_phys_dsgl *phys_cpl;
+       struct phys_sge_parm sg_param;
+       struct scatterlist *src, *dst;
+       struct scatterlist src_sg[2], dst_sg[2];
+       unsigned int frags = 0, transhdr_len;
+       unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0;
+       unsigned int   kctx_len = 0;
+       unsigned short stop_offset = 0;
+       unsigned int  assoclen = req->assoclen;
+       unsigned int  authsize = crypto_aead_authsize(tfm);
+       int err = 0;
+       int null = 0;
+       gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+               GFP_ATOMIC;
+
+       if (aeadctx->enckey_len == 0 || (req->cryptlen == 0))
+               goto err;
+
+       if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
+               goto err;
+
+       if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
+               goto err;
+       src = scatterwalk_ffwd(src_sg, req->src, req->assoclen);
+       dst = src;
+       if (req->src != req->dst) {
+               err = chcr_copy_assoc(req, aeadctx);
+               if (err)
+                       return ERR_PTR(err);
+               dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen);
+       }
+       if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) {
+               null = 1;
+               assoclen = 0;
+       }
+       reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen +
+                                            (op_type ? -authsize : authsize));
+       if (reqctx->dst_nents <= 0) {
+               pr_err("AUTHENC:Invalid Destination sg entries\n");
+               goto err;
+       }
+       dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
+       kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4)
+               - sizeof(chcr_req->key_ctx);
+       transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
+       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
+       if (!skb)
+               goto err;
+
+       /* LLD is going to write the sge hdr. */
+       skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+
+       /* Write WR */
+       chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len);
+       memset(chcr_req, 0, transhdr_len);
+
+       stop_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
+
+       /*
+        * Input order  is AAD,IV and Payload. where IV should be included as
+        * the part of authdata. All other fields should be filled according
+        * to the hardware spec
+        */
+       chcr_req->sec_cpl.op_ivinsrtofst =
+               FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2,
+                                      (ivsize ? (assoclen + 1) : 0));
+       chcr_req->sec_cpl.pldlen = htonl(assoclen + ivsize + req->cryptlen);
+       chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+                                       assoclen ? 1 : 0, assoclen,
+                                       assoclen + ivsize + 1,
+                                       (stop_offset & 0x1F0) >> 4);
+       chcr_req->sec_cpl.cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(
+                                       stop_offset & 0xF,
+                                       null ? 0 : assoclen + ivsize + 1,
+                                       stop_offset, stop_offset);
+       chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type,
+                                       (op_type == CHCR_ENCRYPT_OP) ? 1 : 0,
+                                       CHCR_SCMD_CIPHER_MODE_AES_CBC,
+                                       actx->auth_mode, aeadctx->hmac_ctrl,
+                                       ivsize >> 1);
+       chcr_req->sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+                                        0, 1, dst_size);
+
+       chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
+       if (op_type == CHCR_ENCRYPT_OP)
+               memcpy(chcr_req->key_ctx.key, aeadctx->key,
+                      aeadctx->enckey_len);
+       else
+               memcpy(chcr_req->key_ctx.key, actx->dec_rrkey,
+                      aeadctx->enckey_len);
+
+       memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) <<
+                                       4), actx->h_iopad, kctx_len -
+                               (DIV_ROUND_UP(aeadctx->enckey_len, 16) << 4));
+
+       phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+       sg_param.nents = reqctx->dst_nents;
+       sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
+       sg_param.qid = qid;
+       sg_param.align = 0;
+       if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst,
+                                 &sg_param))
+               goto dstmap_fail;
+
+       skb_set_transport_header(skb, transhdr_len);
+
+       if (assoclen) {
+               /* AAD buffer in */
+               write_sg_to_skb(skb, &frags, req->src, assoclen);
+
+       }
+       write_buffer_to_skb(skb, &frags, req->iv, ivsize);
+       write_sg_to_skb(skb, &frags, src, req->cryptlen);
+       create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
+                  sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+       reqctx->skb = skb;
+       skb_get(skb);
+
+       return skb;
+dstmap_fail:
+       /* ivmap_fail: */
+       kfree_skb(skb);
+err:
+       return ERR_PTR(-EINVAL);
+}
+
+static void aes_gcm_empty_pld_pad(struct scatterlist *sg,
+                                 unsigned short offset)
+{
+       struct page *spage;
+       unsigned char *addr;
+
+       spage = sg_page(sg);
+       get_page(spage); /* so that it is not freed by NIC */
+#ifdef KMAP_ATOMIC_ARGS
+       addr = kmap_atomic(spage, KM_SOFTIRQ0);
+#else
+       addr = kmap_atomic(spage);
+#endif
+       memset(addr + sg->offset, 0, offset + 1);
+
+       kunmap_atomic(addr);
+}
+
+static int set_msg_len(u8 *block, unsigned int msglen, int csize)
+{
+       __be32 data;
+
+       memset(block, 0, csize);
+       block += csize;
+
+       if (csize >= 4)
+               csize = 4;
+       else if (msglen > (unsigned int)(1 << (8 * csize)))
+               return -EOVERFLOW;
+
+       data = cpu_to_be32(msglen);
+       memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
+
+       return 0;
+}
+
+static void generate_b0(struct aead_request *req,
+                       struct chcr_aead_ctx *aeadctx,
+                       unsigned short op_type)
+{
+       unsigned int l, lp, m;
+       int rc;
+       struct crypto_aead *aead = crypto_aead_reqtfm(req);
+       struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+       u8 *b0 = reqctx->scratch_pad;
+
+       m = crypto_aead_authsize(aead);
+
+       memcpy(b0, reqctx->iv, 16);
+
+       lp = b0[0];
+       l = lp + 1;
+
+       /* set m, bits 3-5 */
+       *b0 |= (8 * ((m - 2) / 2));
+
+       /* set adata, bit 6, if associated data is used */
+       if (req->assoclen)
+               *b0 |= 64;
+       rc = set_msg_len(b0 + 16 - l,
+                        (op_type == CHCR_DECRYPT_OP) ?
+                        req->cryptlen - m : req->cryptlen, l);
+}
+
+static inline int crypto_ccm_check_iv(const u8 *iv)
+{
+       /* 2 <= L <= 8, so 1 <= L' <= 7. */
+       if (iv[0] < 1 || iv[0] > 7)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int ccm_format_packet(struct aead_request *req,
+                            struct chcr_aead_ctx *aeadctx,
+                            unsigned int sub_type,
+                            unsigned short op_type)
+{
+       struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+       int rc = 0;
+
+       if (req->assoclen > T5_MAX_AAD_SIZE) {
+               pr_err("CCM: Unsupported AAD data. It should be < %d\n",
+                      T5_MAX_AAD_SIZE);
+               return -EINVAL;
+       }
+       if (sub_type == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) {
+               reqctx->iv[0] = 3;
+               memcpy(reqctx->iv + 1, &aeadctx->salt[0], 3);
+               memcpy(reqctx->iv + 4, req->iv, 8);
+               memset(reqctx->iv + 12, 0, 4);
+               *((unsigned short *)(reqctx->scratch_pad + 16)) =
+                       htons(req->assoclen - 8);
+       } else {
+               memcpy(reqctx->iv, req->iv, 16);
+               *((unsigned short *)(reqctx->scratch_pad + 16)) =
+                       htons(req->assoclen);
+       }
+       generate_b0(req, aeadctx, op_type);
+       /* zero the ctr value */
+       memset(reqctx->iv + 15 - reqctx->iv[0], 0, reqctx->iv[0] + 1);
+       return rc;
+}
+
+static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
+                                 unsigned int dst_size,
+                                 struct aead_request *req,
+                                 unsigned short op_type,
+                                         struct chcr_context *chcrctx)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       unsigned int ivsize = AES_BLOCK_SIZE;
+       unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM;
+       unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC;
+       unsigned int c_id = chcrctx->dev->tx_channel_id;
+       unsigned int ccm_xtra;
+       unsigned char tag_offset = 0, auth_offset = 0;
+       unsigned char hmac_ctrl = get_hmac(crypto_aead_authsize(tfm));
+       unsigned int assoclen;
+
+       if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309)
+               assoclen = req->assoclen - 8;
+       else
+               assoclen = req->assoclen;
+       ccm_xtra = CCM_B0_SIZE +
+               ((assoclen) ? CCM_AAD_FIELD_SIZE : 0);
+
+       auth_offset = req->cryptlen ?
+               (assoclen + ivsize + 1 + ccm_xtra) : 0;
+       if (op_type == CHCR_DECRYPT_OP) {
+               if (crypto_aead_authsize(tfm) != req->cryptlen)
+                       tag_offset = crypto_aead_authsize(tfm);
+               else
+                       auth_offset = 0;
+       }
+
+
+       sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id,
+                                        2, (ivsize ?  (assoclen + 1) :  0) +
+                                        ccm_xtra);
+       sec_cpl->pldlen =
+               htonl(assoclen + ivsize + req->cryptlen + ccm_xtra);
+       /* For CCM there wil be b0 always. So AAD start will be 1 always */
+       sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+                                       1, assoclen + ccm_xtra, assoclen
+                                       + ivsize + 1 + ccm_xtra, 0);
+
+       sec_cpl->cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0,
+                                       auth_offset, tag_offset,
+                                       (op_type == CHCR_ENCRYPT_OP) ? 0 :
+                                       crypto_aead_authsize(tfm));
+       sec_cpl->seqno_numivs =  FILL_SEC_CPL_SCMD0_SEQNO(op_type,
+                                       (op_type == CHCR_ENCRYPT_OP) ? 0 : 1,
+                                       cipher_mode, mac_mode, hmac_ctrl,
+                                       ivsize >> 1);
+
+       sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1, 0,
+                                       1, dst_size);
+}
+
+int aead_ccm_validate_input(unsigned short op_type,
+                           struct aead_request *req,
+                           struct chcr_aead_ctx *aeadctx,
+                           unsigned int sub_type)
+{
+       if (sub_type != CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) {
+               if (crypto_ccm_check_iv(req->iv)) {
+                       pr_err("CCM: IV check fails\n");
+                       return -EINVAL;
+               }
+       } else {
+               if (req->assoclen != 16 && req->assoclen != 20) {
+                       pr_err("RFC4309: Invalid AAD length %d\n",
+                              req->assoclen);
+                       return -EINVAL;
+               }
+       }
+       if (aeadctx->enckey_len == 0) {
+               pr_err("CCM: Encryption key not set\n");
+               return -EINVAL;
+       }
+       return 0;
+}
+
+unsigned int fill_aead_req_fields(struct sk_buff *skb,
+                                 struct aead_request *req,
+                                 struct scatterlist *src,
+                                 unsigned int ivsize,
+                                 struct chcr_aead_ctx *aeadctx)
+{
+       unsigned int frags = 0;
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+       /* b0 and aad length(if available) */
+
+       write_buffer_to_skb(skb, &frags, reqctx->scratch_pad, CCM_B0_SIZE +
+                               (req->assoclen ?  CCM_AAD_FIELD_SIZE : 0));
+       if (req->assoclen) {
+               if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309)
+                       write_sg_to_skb(skb, &frags, req->src,
+                                       req->assoclen - 8);
+               else
+                       write_sg_to_skb(skb, &frags, req->src, req->assoclen);
+       }
+       write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
+       if (req->cryptlen)
+               write_sg_to_skb(skb, &frags, src, req->cryptlen);
+
+       return frags;
+}
+
+static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
+                                         unsigned short qid,
+                                         int size,
+                                         unsigned short op_type)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chcr_context *ctx = crypto_aead_ctx(tfm);
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+       struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+       struct sk_buff *skb = NULL;
+       struct chcr_wr *chcr_req;
+       struct cpl_rx_phys_dsgl *phys_cpl;
+       struct phys_sge_parm sg_param;
+       struct scatterlist *src, *dst;
+       struct scatterlist src_sg[2], dst_sg[2];
+       unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE;
+       unsigned int dst_size = 0, kctx_len;
+       unsigned int sub_type;
+       unsigned int authsize = crypto_aead_authsize(tfm);
+       int err = 0;
+       gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+               GFP_ATOMIC;
+
+
+       if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
+               goto err;
+
+       if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
+               goto err;
+       sub_type = get_aead_subtype(tfm);
+       src = scatterwalk_ffwd(src_sg, req->src, req->assoclen);
+       dst = src;
+       if (req->src != req->dst) {
+               err = chcr_copy_assoc(req, aeadctx);
+               if (err) {
+                       pr_err("AAD copy to destination buffer fails\n");
+                       return ERR_PTR(err);
+               }
+               dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen);
+       }
+       reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen +
+                                            (op_type ? -authsize : authsize));
+       if (reqctx->dst_nents <= 0) {
+               pr_err("CCM:Invalid Destination sg entries\n");
+               goto err;
+       }
+
+
+       if (aead_ccm_validate_input(op_type, req, aeadctx, sub_type))
+               goto err;
+
+       dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
+       kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) * 2;
+       transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
+       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),  flags);
+
+       if (!skb)
+               goto err;
+
+       skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+
+       chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len);
+       memset(chcr_req, 0, transhdr_len);
+
+       fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, op_type, ctx);
+
+       chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
+       memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len);
+       memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) *
+                                       16), aeadctx->key, aeadctx->enckey_len);
+
+       phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+       if (ccm_format_packet(req, aeadctx, sub_type, op_type))
+               goto dstmap_fail;
+
+       sg_param.nents = reqctx->dst_nents;
+       sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
+       sg_param.qid = qid;
+       sg_param.align = 0;
+       if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst,
+                                 &sg_param))
+               goto dstmap_fail;
+
+       skb_set_transport_header(skb, transhdr_len);
+       frags = fill_aead_req_fields(skb, req, src, ivsize, aeadctx);
+       create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1,
+                   sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+       reqctx->skb = skb;
+       skb_get(skb);
+       return skb;
+dstmap_fail:
+       kfree_skb(skb);
+       skb = NULL;
+err:
+       return ERR_PTR(-EINVAL);
+}
+
+static struct sk_buff *create_gcm_wr(struct aead_request *req,
+                                    unsigned short qid,
+                                    int size,
+                                    unsigned short op_type)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chcr_context *ctx = crypto_aead_ctx(tfm);
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+       struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+       struct sk_buff *skb = NULL;
+       struct chcr_wr *chcr_req;
+       struct cpl_rx_phys_dsgl *phys_cpl;
+       struct phys_sge_parm sg_param;
+       struct scatterlist *src, *dst;
+       struct scatterlist src_sg[2], dst_sg[2];
+       unsigned int frags = 0, transhdr_len;
+       unsigned int ivsize = AES_BLOCK_SIZE;
+       unsigned int dst_size = 0, kctx_len;
+       unsigned char tag_offset = 0;
+       unsigned int crypt_len = 0;
+       unsigned int authsize = crypto_aead_authsize(tfm);
+       unsigned char hmac_ctrl = get_hmac(authsize);
+       int err = 0;
+       gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+               GFP_ATOMIC;
+
+       /* validate key size */
+       if (aeadctx->enckey_len == 0)
+               goto err;
+
+       if (op_type && req->cryptlen < crypto_aead_authsize(tfm))
+               goto err;
+
+       if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0)
+               goto err;
+
+       src = scatterwalk_ffwd(src_sg, req->src, req->assoclen);
+       dst = src;
+       if (req->src != req->dst) {
+               err = chcr_copy_assoc(req, aeadctx);
+               if (err)
+                       return  ERR_PTR(err);
+               dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen);
+       }
+
+       if (!req->cryptlen)
+               /* null-payload is not supported in the hardware.
+                * software is sending block size
+                */
+               crypt_len = AES_BLOCK_SIZE;
+       else
+               crypt_len = req->cryptlen;
+       reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen +
+                                            (op_type ? -authsize : authsize));
+       if (reqctx->dst_nents <= 0) {
+               pr_err("GCM:Invalid Destination sg entries\n");
+               goto err;
+       }
+
+
+       dst_size = get_space_for_phys_dsgl(reqctx->dst_nents);
+       kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) +
+               AEAD_H_SIZE;
+       transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size);
+       skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags);
+       if (!skb)
+               goto err;
+
+       /* NIC driver is going to write the sge hdr. */
+       skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+
+       chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len);
+       memset(chcr_req, 0, transhdr_len);
+
+       if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106)
+               req->assoclen -= 8;
+
+       tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize;
+       chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(
+                                       ctx->dev->tx_channel_id, 2, (ivsize ?
+                                       (req->assoclen + 1) : 0));
+       chcr_req->sec_cpl.pldlen = htonl(req->assoclen + ivsize + crypt_len);
+       chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+                                       req->assoclen ? 1 : 0, req->assoclen,
+                                       req->assoclen + ivsize + 1, 0);
+       if (req->cryptlen) {
+               chcr_req->sec_cpl.cipherstop_lo_authinsert =
+                       FILL_SEC_CPL_AUTHINSERT(0, req->assoclen + ivsize + 1,
+                                               tag_offset, tag_offset);
+               chcr_req->sec_cpl.seqno_numivs =
+                       FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type ==
+                                       CHCR_ENCRYPT_OP) ? 1 : 0,
+                                       CHCR_SCMD_CIPHER_MODE_AES_GCM,
+                                       CHCR_SCMD_AUTH_MODE_GHASH, hmac_ctrl,
+                                       ivsize >> 1);
+       } else {
+               chcr_req->sec_cpl.cipherstop_lo_authinsert =
+                       FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0);
+               chcr_req->sec_cpl.seqno_numivs =
+                       FILL_SEC_CPL_SCMD0_SEQNO(op_type,
+                                       (op_type ==  CHCR_ENCRYPT_OP) ?
+                                       1 : 0, CHCR_SCMD_CIPHER_MODE_AES_CBC,
+                                       0, 0, ivsize >> 1);
+       }
+       chcr_req->sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+                                       0, 1, dst_size);
+       chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr;
+       memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len);
+       memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) *
+                               16), GCM_CTX(aeadctx)->ghash_h, AEAD_H_SIZE);
+
+       /* prepare a 16 byte iv */
+       /* S   A   L  T |  IV | 0x00000001 */
+       if (get_aead_subtype(tfm) ==
+           CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) {
+               memcpy(reqctx->iv, aeadctx->salt, 4);
+               memcpy(reqctx->iv + 4, req->iv, 8);
+       } else {
+               memcpy(reqctx->iv, req->iv, 12);
+       }
+       *((unsigned int *)(reqctx->iv + 12)) = htonl(0x01);
+
+       phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len);
+       sg_param.nents = reqctx->dst_nents;
+       sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize);
+       sg_param.qid = qid;
+       sg_param.align = 0;
+       if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst,
+                                 &sg_param))
+               goto dstmap_fail;
+
+       skb_set_transport_header(skb, transhdr_len);
+
+       write_sg_to_skb(skb, &frags, req->src, req->assoclen);
+
+       write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize);
+
+       if (req->cryptlen) {
+               write_sg_to_skb(skb, &frags, src, req->cryptlen);
+       } else {
+               aes_gcm_empty_pld_pad(req->dst, authsize - 1);
+               write_sg_to_skb(skb, &frags, dst, crypt_len);
+       }
+
+       create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1,
+                       sizeof(struct cpl_rx_phys_dsgl) + dst_size);
+       reqctx->skb = skb;
+       skb_get(skb);
+       return skb;
+
+dstmap_fail:
+       /* ivmap_fail: */
+       kfree_skb(skb);
+       skb = NULL;
+err:
+       return skb;
+}
+
+
+
+static int chcr_aead_cra_init(struct crypto_aead *tfm)
+{
+       struct chcr_context *ctx = crypto_aead_ctx(tfm);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+
+       crypto_aead_set_reqsize(tfm, sizeof(struct chcr_aead_reqctx));
+       aeadctx->null = crypto_get_default_null_skcipher();
+       if (IS_ERR(aeadctx->null))
+               return PTR_ERR(aeadctx->null);
+       return chcr_device_init(ctx);
+}
+
+static void chcr_aead_cra_exit(struct crypto_aead *tfm)
+{
+       crypto_put_default_null_skcipher();
+}
+
+static int chcr_authenc_null_setauthsize(struct crypto_aead *tfm,
+                                       unsigned int authsize)
+{
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+       aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NOP;
+       aeadctx->mayverify = VERIFY_HW;
+       return 0;
+}
+static int chcr_authenc_setauthsize(struct crypto_aead *tfm,
+                                   unsigned int authsize)
+{
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+       u32 maxauth = crypto_aead_maxauthsize(tfm);
+
+       /*SHA1 authsize in ipsec is 12 instead of 10 i.e maxauthsize / 2 is not
+        * true for sha1. authsize == 12 condition should be before
+        * authsize == (maxauth >> 1)
+        */
+       if (authsize == ICV_4) {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1;
+               aeadctx->mayverify = VERIFY_HW;
+       } else if (authsize == ICV_6) {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL2;
+               aeadctx->mayverify = VERIFY_HW;
+       } else if (authsize == ICV_10) {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366;
+               aeadctx->mayverify = VERIFY_HW;
+       } else if (authsize == ICV_12) {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+               aeadctx->mayverify = VERIFY_HW;
+       } else if (authsize == ICV_14) {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3;
+               aeadctx->mayverify = VERIFY_HW;
+       } else if (authsize == (maxauth >> 1)) {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+               aeadctx->mayverify = VERIFY_HW;
+       } else if (authsize == maxauth) {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+               aeadctx->mayverify = VERIFY_HW;
+       } else {
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+               aeadctx->mayverify = VERIFY_SW;
+       }
+       return 0;
+}
+
+
+static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+       switch (authsize) {
+       case ICV_4:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_8:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_12:
+                aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+                aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_14:
+                aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3;
+                aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_16:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_13:
+       case ICV_15:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+               aeadctx->mayverify = VERIFY_SW;
+               break;
+       default:
+
+                 crypto_tfm_set_flags((struct crypto_tfm *) tfm,
+                       CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm,
+                                         unsigned int authsize)
+{
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+       switch (authsize) {
+       case ICV_8:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_12:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_16:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       default:
+               crypto_tfm_set_flags((struct crypto_tfm *)tfm,
+                                    CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int chcr_ccm_setauthsize(struct crypto_aead *tfm,
+                               unsigned int authsize)
+{
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+
+       switch (authsize) {
+       case ICV_4:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_6:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL2;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_8:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_10:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_12:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_14:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       case ICV_16:
+               aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+               aeadctx->mayverify = VERIFY_HW;
+               break;
+       default:
+               crypto_tfm_set_flags((struct crypto_tfm *)tfm,
+                                    CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int chcr_aead_ccm_setkey(struct crypto_aead *aead,
+                               const u8 *key,
+                               unsigned int keylen)
+{
+       struct chcr_context *ctx = crypto_aead_ctx(aead);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+       unsigned char ck_size, mk_size;
+       int key_ctx_size = 0;
+
+       memcpy(aeadctx->key, key, keylen);
+       aeadctx->enckey_len = keylen;
+       key_ctx_size = sizeof(struct _key_ctx) +
+               ((DIV_ROUND_UP(keylen, 16)) << 4)  * 2;
+       if (keylen == AES_KEYSIZE_128) {
+               mk_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+       } else if (keylen == AES_KEYSIZE_192) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+               mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_192;
+       } else if (keylen == AES_KEYSIZE_256) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+               mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+       } else {
+               crypto_tfm_set_flags((struct crypto_tfm *)aead,
+                                    CRYPTO_TFM_RES_BAD_KEY_LEN);
+               aeadctx->enckey_len = 0;
+               return  -EINVAL;
+       }
+       aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, mk_size, 0, 0,
+                                               key_ctx_size >> 4);
+       return 0;
+}
+
+static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key,
+                                   unsigned int keylen)
+{
+       struct chcr_context *ctx = crypto_aead_ctx(aead);
+        struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+
+       if (keylen < 3) {
+               crypto_tfm_set_flags((struct crypto_tfm *)aead,
+                                    CRYPTO_TFM_RES_BAD_KEY_LEN);
+               aeadctx->enckey_len = 0;
+               return  -EINVAL;
+       }
+       keylen -= 3;
+       memcpy(aeadctx->salt, key + keylen, 3);
+       return chcr_aead_ccm_setkey(aead, key, keylen);
+}
+
+static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key,
+                          unsigned int keylen)
+{
+       struct chcr_context *ctx = crypto_aead_ctx(aead);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+       struct chcr_gcm_ctx *gctx = GCM_CTX(aeadctx);
+       struct blkcipher_desc h_desc;
+       struct scatterlist src[1];
+       unsigned int ck_size;
+       int ret = 0, key_ctx_size = 0;
+
+       if (get_aead_subtype(aead) ==
+           CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) {
+               keylen -= 4;  /* nonce/salt is present in the last 4 bytes */
+               memcpy(aeadctx->salt, key + keylen, 4);
+       }
+       if (keylen == AES_KEYSIZE_128) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+       } else if (keylen == AES_KEYSIZE_192) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+       } else if (keylen == AES_KEYSIZE_256) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+       } else {
+               crypto_tfm_set_flags((struct crypto_tfm *)aead,
+                                    CRYPTO_TFM_RES_BAD_KEY_LEN);
+               aeadctx->enckey_len = 0;
+               pr_err("GCM: Invalid key length %d", keylen);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       memcpy(aeadctx->key, key, keylen);
+       aeadctx->enckey_len = keylen;
+       key_ctx_size = sizeof(struct _key_ctx) +
+               ((DIV_ROUND_UP(keylen, 16)) << 4) +
+               AEAD_H_SIZE;
+               aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+                                               CHCR_KEYCTX_MAC_KEY_SIZE_128,
+                                               0, 0,
+                                               key_ctx_size >> 4);
+       /* Calculate the H = CIPH(K, 0 repeated 16 times) using sync aes
+        * blkcipher It will go on key context
+        */
+       h_desc.tfm = crypto_alloc_blkcipher("cbc(aes-generic)", 0, 0);
+       if (IS_ERR(h_desc.tfm)) {
+               aeadctx->enckey_len = 0;
+               ret = -ENOMEM;
+               goto out;
+       }
+       h_desc.flags = 0;
+       ret = crypto_blkcipher_setkey(h_desc.tfm, key, keylen);
+       if (ret) {
+               aeadctx->enckey_len = 0;
+               goto out1;
+       }
+       memset(gctx->ghash_h, 0, AEAD_H_SIZE);
+       sg_init_one(&src[0], gctx->ghash_h, AEAD_H_SIZE);
+       ret = crypto_blkcipher_encrypt(&h_desc, &src[0], &src[0], AEAD_H_SIZE);
+
+out1:
+       crypto_free_blkcipher(h_desc.tfm);
+out:
+       return ret;
+}
+
+static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key,
+                                  unsigned int keylen)
+{
+       struct chcr_context *ctx = crypto_aead_ctx(authenc);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+       struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
+       /* it contains auth and cipher key both*/
+       struct crypto_authenc_keys keys;
+       unsigned int bs;
+       unsigned int max_authsize = crypto_aead_alg(authenc)->maxauthsize;
+       int err = 0, i, key_ctx_len = 0;
+       unsigned char ck_size = 0;
+       unsigned char pad[CHCR_HASH_MAX_BLOCK_SIZE_128] = { 0 };
+       struct crypto_shash *base_hash = NULL;
+       struct algo_param param;
+       int align;
+       u8 *o_ptr = NULL;
+
+       if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
+               crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               goto out;
+       }
+
+       if (get_alg_config(&param, max_authsize)) {
+               pr_err("chcr : Unsupported digest size\n");
+               goto out;
+       }
+       if (keys.enckeylen == AES_KEYSIZE_128) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+       } else if (keys.enckeylen == AES_KEYSIZE_192) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+       } else if (keys.enckeylen == AES_KEYSIZE_256) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+       } else {
+               pr_err("chcr : Unsupported cipher key\n");
+               goto out;
+       }
+
+       /* Copy only encryption key. We use authkey to generate h(ipad) and
+        * h(opad) so authkey is not needed again. authkeylen size have the
+        * size of the hash digest size.
+        */
+       memcpy(aeadctx->key, keys.enckey, keys.enckeylen);
+       aeadctx->enckey_len = keys.enckeylen;
+       get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
+                           aeadctx->enckey_len << 3);
+
+       base_hash  = chcr_alloc_shash(max_authsize);
+       if (IS_ERR(base_hash)) {
+               pr_err("chcr : Base driver cannot be loaded\n");
+               goto out;
        }
+       {
+               SHASH_DESC_ON_STACK(shash, base_hash);
+               shash->tfm = base_hash;
+               shash->flags = crypto_shash_get_flags(base_hash);
+               bs = crypto_shash_blocksize(base_hash);
+               align = KEYCTX_ALIGN_PAD(max_authsize);
+               o_ptr =  actx->h_iopad + param.result_size + align;
+
+               if (keys.authkeylen > bs) {
+                       err = crypto_shash_digest(shash, keys.authkey,
+                                                 keys.authkeylen,
+                                                 o_ptr);
+                       if (err) {
+                               pr_err("chcr : Base driver cannot be loaded\n");
+                               goto out;
+                       }
+                       keys.authkeylen = max_authsize;
+               } else
+                       memcpy(o_ptr, keys.authkey, keys.authkeylen);
+
+               /* Compute the ipad-digest*/
+               memset(pad + keys.authkeylen, 0, bs - keys.authkeylen);
+               memcpy(pad, o_ptr, keys.authkeylen);
+               for (i = 0; i < bs >> 2; i++)
+                       *((unsigned int *)pad + i) ^= IPAD_DATA;
+
+               if (chcr_compute_partial_hash(shash, pad, actx->h_iopad,
+                                             max_authsize))
+                       goto out;
+               /* Compute the opad-digest */
+               memset(pad + keys.authkeylen, 0, bs - keys.authkeylen);
+               memcpy(pad, o_ptr, keys.authkeylen);
+               for (i = 0; i < bs >> 2; i++)
+                       *((unsigned int *)pad + i) ^= OPAD_DATA;
+
+               if (chcr_compute_partial_hash(shash, pad, o_ptr, max_authsize))
+                       goto out;
+
+               /* convert the ipad and opad digest to network order */
+               chcr_change_order(actx->h_iopad, param.result_size);
+               chcr_change_order(o_ptr, param.result_size);
+               key_ctx_len = sizeof(struct _key_ctx) +
+                       ((DIV_ROUND_UP(keys.enckeylen, 16)) << 4) +
+                       (param.result_size + align) * 2;
+               aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, param.mk_size,
+                                               0, 1, key_ctx_len >> 4);
+               actx->auth_mode = param.auth_mode;
+               chcr_free_shash(base_hash);
+
+               return 0;
+       }
+out:
+       aeadctx->enckey_len = 0;
+       if (base_hash)
+               chcr_free_shash(base_hash);
+       return -EINVAL;
 }
 
+static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc,
+                                       const u8 *key, unsigned int keylen)
+{
+       struct chcr_context *ctx = crypto_aead_ctx(authenc);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
+       struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
+       struct crypto_authenc_keys keys;
+
+       /* it contains auth and cipher key both*/
+       int key_ctx_len = 0;
+       unsigned char ck_size = 0;
+
+       if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) {
+               crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               goto out;
+       }
+       if (keys.enckeylen == AES_KEYSIZE_128) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+       } else if (keys.enckeylen == AES_KEYSIZE_192) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+       } else if (keys.enckeylen == AES_KEYSIZE_256) {
+               ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+       } else {
+               pr_err("chcr : Unsupported cipher key\n");
+               goto out;
+       }
+       memcpy(aeadctx->key, keys.enckey, keys.enckeylen);
+       aeadctx->enckey_len = keys.enckeylen;
+       get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key,
+                                   aeadctx->enckey_len << 3);
+       key_ctx_len =  sizeof(struct _key_ctx)
+               + ((DIV_ROUND_UP(keys.enckeylen, 16)) << 4);
+
+       aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY, 0,
+                                               0, key_ctx_len >> 4);
+       actx->auth_mode = CHCR_SCMD_AUTH_MODE_NOP;
+       return 0;
+out:
+       aeadctx->enckey_len = 0;
+       return -EINVAL;
+}
+static int chcr_aead_encrypt(struct aead_request *req)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+
+       reqctx->verify = VERIFY_HW;
+
+       switch (get_aead_subtype(tfm)) {
+       case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC:
+       case CRYPTO_ALG_SUB_TYPE_AEAD_NULL:
+               return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
+                                   create_authenc_wr);
+       case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
+       case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309:
+               return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
+                                   create_aead_ccm_wr);
+       default:
+               return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0,
+                                   create_gcm_wr);
+       }
+}
+
+static int chcr_aead_decrypt(struct aead_request *req)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm));
+       struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+       int size;
+
+       if (aeadctx->mayverify == VERIFY_SW) {
+               size = crypto_aead_maxauthsize(tfm);
+               reqctx->verify = VERIFY_SW;
+       } else {
+               size = 0;
+               reqctx->verify = VERIFY_HW;
+       }
+
+       switch (get_aead_subtype(tfm)) {
+       case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC:
+       case CRYPTO_ALG_SUB_TYPE_AEAD_NULL:
+               return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
+                                   create_authenc_wr);
+       case CRYPTO_ALG_SUB_TYPE_AEAD_CCM:
+       case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309:
+               return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
+                                   create_aead_ccm_wr);
+       default:
+               return chcr_aead_op(req, CHCR_DECRYPT_OP, size,
+                                   create_gcm_wr);
+       }
+}
+
+static int chcr_aead_op(struct aead_request *req,
+                         unsigned short op_type,
+                         int size,
+                         create_wr_t create_wr_fn)
+{
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chcr_context *ctx = crypto_aead_ctx(tfm);
+       struct uld_ctx *u_ctx = ULD_CTX(ctx);
+       struct sk_buff *skb;
+
+       if (ctx && !ctx->dev) {
+               pr_err("chcr : %s : No crypto device.\n", __func__);
+               return -ENXIO;
+       }
+       if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+                                  ctx->tx_channel_id)) {
+               if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+                       return -EBUSY;
+       }
+
+       /* Form a WR from req */
+       skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[ctx->tx_channel_id], size,
+                          op_type);
+
+       if (IS_ERR(skb) || skb == NULL) {
+               pr_err("chcr : %s : failed to form WR. No memory\n", __func__);
+               return PTR_ERR(skb);
+       }
+
+       skb->dev = u_ctx->lldi.ports[0];
+       set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+       chcr_send_wr(skb);
+       return -EINPROGRESS;
+}
 static struct chcr_alg_template driver_algs[] = {
        /* AES-CBC */
        {
@@ -1234,7 +2487,7 @@ static struct chcr_alg_template driver_algs[] = {
                .is_registered = 0,
                .alg.crypto = {
                        .cra_name               = "cbc(aes)",
-                       .cra_driver_name        = "cbc(aes-chcr)",
+                       .cra_driver_name        = "cbc-aes-chcr",
                        .cra_priority           = CHCR_CRA_PRIORITY,
                        .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
                                CRYPTO_ALG_ASYNC,
@@ -1261,7 +2514,7 @@ static struct chcr_alg_template driver_algs[] = {
                .is_registered = 0,
                .alg.crypto =   {
                        .cra_name               = "xts(aes)",
-                       .cra_driver_name        = "xts(aes-chcr)",
+                       .cra_driver_name        = "xts-aes-chcr",
                        .cra_priority           = CHCR_CRA_PRIORITY,
                        .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER |
                                CRYPTO_ALG_ASYNC,
@@ -1354,7 +2607,7 @@ static struct chcr_alg_template driver_algs[] = {
                        .halg.digestsize = SHA1_DIGEST_SIZE,
                        .halg.base = {
                                .cra_name = "hmac(sha1)",
-                               .cra_driver_name = "hmac(sha1-chcr)",
+                               .cra_driver_name = "hmac-sha1-chcr",
                                .cra_blocksize = SHA1_BLOCK_SIZE,
                        }
                }
@@ -1366,7 +2619,7 @@ static struct chcr_alg_template driver_algs[] = {
                        .halg.digestsize = SHA224_DIGEST_SIZE,
                        .halg.base = {
                                .cra_name = "hmac(sha224)",
-                               .cra_driver_name = "hmac(sha224-chcr)",
+                               .cra_driver_name = "hmac-sha224-chcr",
                                .cra_blocksize = SHA224_BLOCK_SIZE,
                        }
                }
@@ -1378,7 +2631,7 @@ static struct chcr_alg_template driver_algs[] = {
                        .halg.digestsize = SHA256_DIGEST_SIZE,
                        .halg.base = {
                                .cra_name = "hmac(sha256)",
-                               .cra_driver_name = "hmac(sha256-chcr)",
+                               .cra_driver_name = "hmac-sha256-chcr",
                                .cra_blocksize = SHA256_BLOCK_SIZE,
                        }
                }
@@ -1390,7 +2643,7 @@ static struct chcr_alg_template driver_algs[] = {
                        .halg.digestsize = SHA384_DIGEST_SIZE,
                        .halg.base = {
                                .cra_name = "hmac(sha384)",
-                               .cra_driver_name = "hmac(sha384-chcr)",
+                               .cra_driver_name = "hmac-sha384-chcr",
                                .cra_blocksize = SHA384_BLOCK_SIZE,
                        }
                }
@@ -1402,11 +2655,205 @@ static struct chcr_alg_template driver_algs[] = {
                        .halg.digestsize = SHA512_DIGEST_SIZE,
                        .halg.base = {
                                .cra_name = "hmac(sha512)",
-                               .cra_driver_name = "hmac(sha512-chcr)",
+                               .cra_driver_name = "hmac-sha512-chcr",
                                .cra_blocksize = SHA512_BLOCK_SIZE,
                        }
                }
        },
+       /* Add AEAD Algorithms */
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_GCM,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "gcm(aes)",
+                               .cra_driver_name = "gcm-aes-chcr",
+                               .cra_blocksize  = 1,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_gcm_ctx),
+                       },
+                       .ivsize = 12,
+                       .maxauthsize = GHASH_DIGEST_SIZE,
+                       .setkey = chcr_gcm_setkey,
+                       .setauthsize = chcr_gcm_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "rfc4106(gcm(aes))",
+                               .cra_driver_name = "rfc4106-gcm-aes-chcr",
+                               .cra_blocksize   = 1,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_gcm_ctx),
+
+                       },
+                       .ivsize = 8,
+                       .maxauthsize    = GHASH_DIGEST_SIZE,
+                       .setkey = chcr_gcm_setkey,
+                       .setauthsize    = chcr_4106_4309_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_CCM,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "ccm(aes)",
+                               .cra_driver_name = "ccm-aes-chcr",
+                               .cra_blocksize   = 1,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx),
+
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize    = GHASH_DIGEST_SIZE,
+                       .setkey = chcr_aead_ccm_setkey,
+                       .setauthsize    = chcr_ccm_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "rfc4309(ccm(aes))",
+                               .cra_driver_name = "rfc4309-ccm-aes-chcr",
+                               .cra_blocksize   = 1,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx),
+
+                       },
+                       .ivsize = 8,
+                       .maxauthsize    = GHASH_DIGEST_SIZE,
+                       .setkey = chcr_aead_rfc4309_setkey,
+                       .setauthsize = chcr_4106_4309_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "authenc(hmac(sha1),cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc-hmac-sha1-cbc-aes-chcr",
+                               .cra_blocksize   = AES_BLOCK_SIZE,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_authenc_ctx),
+
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize = SHA1_DIGEST_SIZE,
+                       .setkey = chcr_authenc_setkey,
+                       .setauthsize = chcr_authenc_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+
+                               .cra_name = "authenc(hmac(sha256),cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc-hmac-sha256-cbc-aes-chcr",
+                               .cra_blocksize   = AES_BLOCK_SIZE,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_authenc_ctx),
+
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize    = SHA256_DIGEST_SIZE,
+                       .setkey = chcr_authenc_setkey,
+                       .setauthsize = chcr_authenc_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "authenc(hmac(sha224),cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc-hmac-sha224-cbc-aes-chcr",
+                               .cra_blocksize   = AES_BLOCK_SIZE,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_authenc_ctx),
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize = SHA224_DIGEST_SIZE,
+                       .setkey = chcr_authenc_setkey,
+                       .setauthsize = chcr_authenc_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "authenc(hmac(sha384),cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc-hmac-sha384-cbc-aes-chcr",
+                               .cra_blocksize   = AES_BLOCK_SIZE,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_authenc_ctx),
+
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize = SHA384_DIGEST_SIZE,
+                       .setkey = chcr_authenc_setkey,
+                       .setauthsize = chcr_authenc_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "authenc(hmac(sha512),cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc-hmac-sha512-cbc-aes-chcr",
+                               .cra_blocksize   = AES_BLOCK_SIZE,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_authenc_ctx),
+
+                       },
+                       .ivsize = AES_BLOCK_SIZE,
+                       .maxauthsize = SHA512_DIGEST_SIZE,
+                       .setkey = chcr_authenc_setkey,
+                       .setauthsize = chcr_authenc_setauthsize,
+               }
+       },
+       {
+               .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_NULL,
+               .is_registered = 0,
+               .alg.aead = {
+                       .base = {
+                               .cra_name = "authenc(digest_null,cbc(aes))",
+                               .cra_driver_name =
+                                       "authenc-digest_null-cbc-aes-chcr",
+                               .cra_blocksize   = AES_BLOCK_SIZE,
+                               .cra_ctxsize =  sizeof(struct chcr_context) +
+                                               sizeof(struct chcr_aead_ctx) +
+                                               sizeof(struct chcr_authenc_ctx),
+
+                       },
+                       .ivsize  = AES_BLOCK_SIZE,
+                       .maxauthsize = 0,
+                       .setkey  = chcr_aead_digest_null_setkey,
+                       .setauthsize = chcr_authenc_null_setauthsize,
+               }
+       },
 };
 
 /*
@@ -1424,6 +2871,11 @@ static int chcr_unregister_alg(void)
                                crypto_unregister_alg(
                                                &driver_algs[i].alg.crypto);
                        break;
+               case CRYPTO_ALG_TYPE_AEAD:
+                       if (driver_algs[i].is_registered)
+                               crypto_unregister_aead(
+                                               &driver_algs[i].alg.aead);
+                       break;
                case CRYPTO_ALG_TYPE_AHASH:
                        if (driver_algs[i].is_registered)
                                crypto_unregister_ahash(
@@ -1458,6 +2910,19 @@ static int chcr_register_alg(void)
                        err = crypto_register_alg(&driver_algs[i].alg.crypto);
                        name = driver_algs[i].alg.crypto.cra_driver_name;
                        break;
+               case CRYPTO_ALG_TYPE_AEAD:
+                       driver_algs[i].alg.aead.base.cra_priority =
+                               CHCR_CRA_PRIORITY;
+                       driver_algs[i].alg.aead.base.cra_flags =
+                               CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+                       driver_algs[i].alg.aead.encrypt = chcr_aead_encrypt;
+                       driver_algs[i].alg.aead.decrypt = chcr_aead_decrypt;
+                       driver_algs[i].alg.aead.init = chcr_aead_cra_init;
+                       driver_algs[i].alg.aead.exit = chcr_aead_cra_exit;
+                       driver_algs[i].alg.aead.base.cra_module = THIS_MODULE;
+                       err = crypto_register_aead(&driver_algs[i].alg.aead);
+                       name = driver_algs[i].alg.aead.base.cra_driver_name;
+                       break;
                case CRYPTO_ALG_TYPE_AHASH:
                        a_hash = &driver_algs[i].alg.hash;
                        a_hash->update = chcr_ahash_update;
index 199b0bb..3c7c51f 100644 (file)
 #define IPAD_DATA 0x36363636
 #define OPAD_DATA 0x5c5c5c5c
 
-#define TRANSHDR_SIZE(alignedkctx_len)\
-       (sizeof(struct ulptx_idata) +\
-        sizeof(struct ulp_txpkt) +\
-        sizeof(struct fw_crypto_lookaside_wr) +\
-        sizeof(struct cpl_tx_sec_pdu) +\
-        (alignedkctx_len))
-#define CIPHER_TRANSHDR_SIZE(alignedkctx_len, sge_pairs) \
-       (TRANSHDR_SIZE(alignedkctx_len) + sge_pairs +\
+#define TRANSHDR_SIZE(kctx_len)\
+       (sizeof(struct chcr_wr) +\
+        kctx_len)
+#define CIPHER_TRANSHDR_SIZE(kctx_len, sge_pairs) \
+       (TRANSHDR_SIZE((kctx_len)) + (sge_pairs) +\
         sizeof(struct cpl_rx_phys_dsgl))
-#define HASH_TRANSHDR_SIZE(alignedkctx_len)\
-       (TRANSHDR_SIZE(alignedkctx_len) + DUMMY_BYTES)
+#define HASH_TRANSHDR_SIZE(kctx_len)\
+       (TRANSHDR_SIZE(kctx_len) + DUMMY_BYTES)
 
-#define SEC_CPL_OFFSET (sizeof(struct fw_crypto_lookaside_wr) + \
-                       sizeof(struct ulp_txpkt) + \
-                       sizeof(struct ulptx_idata))
 
-#define FILL_SEC_CPL_OP_IVINSR(id, len, hldr, ofst)      \
+#define FILL_SEC_CPL_OP_IVINSR(id, len, ofst)      \
        htonl( \
               CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | \
               CPL_TX_SEC_PDU_RXCHID_V((id)) | \
               CPL_TX_SEC_PDU_ACKFOLLOWS_V(0) | \
               CPL_TX_SEC_PDU_ULPTXLPBK_V(1) | \
               CPL_TX_SEC_PDU_CPLLEN_V((len)) | \
-              CPL_TX_SEC_PDU_PLACEHOLDER_V((hldr)) | \
+              CPL_TX_SEC_PDU_PLACEHOLDER_V(0) | \
               CPL_TX_SEC_PDU_IVINSRTOFST_V((ofst)))
 
 #define  FILL_SEC_CPL_CIPHERSTOP_HI(a_start, a_stop, c_start, c_stop_hi) \
                CPL_TX_SEC_PDU_AUTHSTOP_V((a_stop)) | \
                CPL_TX_SEC_PDU_AUTHINSERT_V((a_inst)))
 
-#define  FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size, nivs)  \
+#define  FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size)  \
                htonl( \
                SCMD_SEQ_NO_CTRL_V(0) | \
                SCMD_STATUS_PRESENT_V(0) | \
                SCMD_AUTH_MODE_V((amode)) | \
                SCMD_HMAC_CTRL_V((opad)) | \
                SCMD_IV_SIZE_V((size)) | \
-               SCMD_NUM_IVS_V((nivs)))
+               SCMD_NUM_IVS_V(0))
 
 #define FILL_SEC_CPL_IVGEN_HDRLEN(last, more, ctx_in, mac, ivdrop, len) htonl( \
                SCMD_ENB_DBGID_V(0) | \
@@ -264,13 +258,15 @@ enum {
  * where they indicate the size of the integrity check value (ICV)
  */
 enum {
-       AES_CCM_ICV_4   = 4,
-       AES_CCM_ICV_6   = 6,
-       AES_CCM_ICV_8   = 8,
-       AES_CCM_ICV_10  = 10,
-       AES_CCM_ICV_12  = 12,
-       AES_CCM_ICV_14  = 14,
-       AES_CCM_ICV_16 = 16
+       ICV_4  = 4,
+       ICV_6  = 6,
+       ICV_8  = 8,
+       ICV_10 = 10,
+       ICV_12 = 12,
+       ICV_13 = 13,
+       ICV_14 = 14,
+       ICV_15 = 15,
+       ICV_16 = 16
 };
 
 struct hash_op_params {
@@ -394,7 +390,7 @@ static const u8 aes_sbox[256] = {
        187, 22
 };
 
-static u32 aes_ks_subword(const u32 w)
+static inline u32 aes_ks_subword(const u32 w)
 {
        u8 bytes[4];
 
@@ -412,61 +408,4 @@ static u32 round_constant[11] = {
        0x1B000000, 0x36000000, 0x6C000000
 };
 
-/* dec_key - OUTPUT - Reverse round key
- * key - INPUT - key
- * keylength - INPUT - length of the key in number of bits
- */
-static inline void get_aes_decrypt_key(unsigned char *dec_key,
-                                      const unsigned char *key,
-                                      unsigned int keylength)
-{
-       u32 temp;
-       u32 w_ring[MAX_NK];
-       int i, j, k;
-       u8  nr, nk;
-
-       switch (keylength) {
-       case AES_KEYLENGTH_128BIT:
-               nk = KEYLENGTH_4BYTES;
-               nr = NUMBER_OF_ROUNDS_10;
-               break;
-
-       case AES_KEYLENGTH_192BIT:
-               nk = KEYLENGTH_6BYTES;
-               nr = NUMBER_OF_ROUNDS_12;
-               break;
-       case AES_KEYLENGTH_256BIT:
-               nk = KEYLENGTH_8BYTES;
-               nr = NUMBER_OF_ROUNDS_14;
-               break;
-       default:
-               return;
-       }
-       for (i = 0; i < nk; i++ )
-               w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]);
-
-       i = 0;
-       temp = w_ring[nk - 1];
-       while(i + nk < (nr + 1) * 4) {
-               if(!(i % nk)) {
-                       /* RotWord(temp) */
-                       temp = (temp << 8) | (temp >> 24);
-                       temp = aes_ks_subword(temp);
-                       temp ^= round_constant[i / nk];
-               }
-               else if (nk == 8 && (i % 4 == 0))
-                       temp = aes_ks_subword(temp);
-               w_ring[i % nk] ^= temp;
-               temp = w_ring[i % nk];
-               i++;
-       }
-       i--;
-       for (k = 0, j = i % nk; k < nk; k++) {
-               *((u32 *)dec_key + k) = htonl(w_ring[j]);
-               j--;
-               if(j < 0)
-                       j += nk;
-       }
-}
-
 #endif /* __CHCR_ALGO_H__ */
index 4d7f670..918da8e 100644 (file)
@@ -110,14 +110,12 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
        if (ack_err_status) {
                if (CHK_MAC_ERR_BIT(ack_err_status) ||
                    CHK_PAD_ERR_BIT(ack_err_status))
-                       error_status = -EINVAL;
+                       error_status = -EBADMSG;
        }
        /* call completion callback with failure status */
        if (req) {
-               if (!chcr_handle_resp(req, input, error_status))
-                       req->complete(req, error_status);
-               else
-                       return -EINVAL;
+               error_status = chcr_handle_resp(req, input, error_status);
+               req->complete(req, error_status);
        } else {
                pr_err("Incorrect request address from the firmware\n");
                return -EFAULT;
index 2a5c671..c7088a4 100644 (file)
 
 #define MAC_ERROR_BIT          0
 #define CHK_MAC_ERR_BIT(x)     (((x) >> MAC_ERROR_BIT) & 1)
+#define MAX_SALT                4
 
 struct uld_ctx;
 
+struct _key_ctx {
+       __be32 ctx_hdr;
+       u8 salt[MAX_SALT];
+       __be64 reserverd;
+       unsigned char key[0];
+};
+
+struct chcr_wr {
+       struct fw_crypto_lookaside_wr wreq;
+       struct ulp_txpkt ulptx;
+       struct ulptx_idata sc_imm;
+       struct cpl_tx_sec_pdu sec_cpl;
+       struct _key_ctx key_ctx;
+};
+
 struct chcr_dev {
-       /* Request submited to h/w and waiting for response. */
        spinlock_t lock_chcr_dev;
-       struct crypto_queue pending_queue;
        struct uld_ctx *u_ctx;
        unsigned char tx_channel_id;
 };
index d7d7560..d5af7d6 100644 (file)
 #ifndef __CHCR_CRYPTO_H__
 #define __CHCR_CRYPTO_H__
 
+#define GHASH_BLOCK_SIZE    16
+#define GHASH_DIGEST_SIZE   16
+
+#define CCM_B0_SIZE             16
+#define CCM_AAD_FIELD_SIZE      2
+#define T5_MAX_AAD_SIZE 512
+
+
 /* Define following if h/w is not dropping the AAD and IV data before
  * giving the processed data
  */
 #define CHCR_SCMD_AUTH_CTRL_AUTH_CIPHER 0
 #define CHCR_SCMD_AUTH_CTRL_CIPHER_AUTH 1
 
-#define CHCR_SCMD_CIPHER_MODE_NOP           0
-#define CHCR_SCMD_CIPHER_MODE_AES_CBC       1
-#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES   4
-#define CHCR_SCMD_CIPHER_MODE_AES_XTS       6
+#define CHCR_SCMD_CIPHER_MODE_NOP               0
+#define CHCR_SCMD_CIPHER_MODE_AES_CBC           1
+#define CHCR_SCMD_CIPHER_MODE_AES_GCM           2
+#define CHCR_SCMD_CIPHER_MODE_AES_CTR           3
+#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES       4
+#define CHCR_SCMD_CIPHER_MODE_AES_XTS           6
+#define CHCR_SCMD_CIPHER_MODE_AES_CCM           7
 
 #define CHCR_SCMD_AUTH_MODE_NOP             0
 #define CHCR_SCMD_AUTH_MODE_SHA1            1
 #define CHCR_SCMD_AUTH_MODE_SHA224          2
 #define CHCR_SCMD_AUTH_MODE_SHA256          3
+#define CHCR_SCMD_AUTH_MODE_GHASH           4
 #define CHCR_SCMD_AUTH_MODE_SHA512_224      5
 #define CHCR_SCMD_AUTH_MODE_SHA512_256      6
 #define CHCR_SCMD_AUTH_MODE_SHA512_384      7
 #define CHCR_SCMD_AUTH_MODE_SHA512_512      8
+#define CHCR_SCMD_AUTH_MODE_CBCMAC          9
+#define CHCR_SCMD_AUTH_MODE_CMAC            10
 
 #define CHCR_SCMD_HMAC_CTRL_NOP             0
 #define CHCR_SCMD_HMAC_CTRL_NO_TRUNC        1
+#define CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366   2
+#define CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT     3
+#define CHCR_SCMD_HMAC_CTRL_PL1                    4
+#define CHCR_SCMD_HMAC_CTRL_PL2                    5
+#define CHCR_SCMD_HMAC_CTRL_PL3                    6
+#define CHCR_SCMD_HMAC_CTRL_DIV2           7
+#define VERIFY_HW 0
+#define VERIFY_SW 1
 
 #define CHCR_SCMD_IVGEN_CTRL_HW             0
 #define CHCR_SCMD_IVGEN_CTRL_SW             1
 #define IV_IMMEDIATE            1
 #define IV_DSGL                        2
 
+#define AEAD_H_SIZE             16
+
 #define CRYPTO_ALG_SUB_TYPE_MASK            0x0f000000
 #define CRYPTO_ALG_SUB_TYPE_HASH_HMAC       0x01000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106    0x02000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_GCM       0x03000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC    0x04000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_CCM        0x05000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309    0x06000000
+#define CRYPTO_ALG_SUB_TYPE_AEAD_NULL       0x07000000
+#define CRYPTO_ALG_SUB_TYPE_CTR             0x08000000
 #define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
                              CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
 
-#define MAX_SALT                4
 #define MAX_SCRATCH_PAD_SIZE    32
 
 #define CHCR_HASH_MAX_BLOCK_SIZE_64  64
 #define CHCR_HASH_MAX_BLOCK_SIZE_128 128
 
 /* Aligned to 128 bit boundary */
-struct _key_ctx {
-       __be32 ctx_hdr;
-       u8 salt[MAX_SALT];
-       __be64 reserverd;
-       unsigned char key[0];
-};
 
 struct ablk_ctx {
-       u8 enc;
-       unsigned int processed_len;
        __be32 key_ctx_hdr;
        unsigned int enckey_len;
-       unsigned int dst_nents;
-       struct scatterlist iv_sg;
        u8 key[CHCR_AES_MAX_KEY_LEN];
-       u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
        unsigned char ciph_mode;
+       u8 rrkey[AES_MAX_KEY_SIZE];
+};
+struct chcr_aead_reqctx {
+       struct  sk_buff *skb;
+       short int dst_nents;
+       u16 verify;
+       u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
+       unsigned char scratch_pad[MAX_SCRATCH_PAD_SIZE];
+};
+
+struct chcr_gcm_ctx {
+       u8 ghash_h[AEAD_H_SIZE];
 };
 
+struct chcr_authenc_ctx {
+       u8 dec_rrkey[AES_MAX_KEY_SIZE];
+       u8 h_iopad[2 * CHCR_HASH_MAX_DIGEST_SIZE];
+       unsigned char auth_mode;
+};
+
+struct __aead_ctx {
+       struct chcr_gcm_ctx gcm[0];
+       struct chcr_authenc_ctx authenc[0];
+};
+
+
+
+struct chcr_aead_ctx {
+       __be32 key_ctx_hdr;
+       unsigned int enckey_len;
+       struct crypto_skcipher *null;
+       u8 salt[MAX_SALT];
+       u8 key[CHCR_AES_MAX_KEY_LEN];
+       u16 hmac_ctrl;
+       u16 mayverify;
+       struct  __aead_ctx ctx[0];
+};
+
+
+
 struct hmac_ctx {
-       struct shash_desc *desc;
+       struct crypto_shash *base_hash;
        u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128];
        u8 opad[CHCR_HASH_MAX_BLOCK_SIZE_128];
 };
@@ -146,6 +203,7 @@ struct hmac_ctx {
 struct __crypto_ctx {
        struct hmac_ctx hmacctx[0];
        struct ablk_ctx ablkctx[0];
+       struct chcr_aead_ctx aeadctx[0];
 };
 
 struct chcr_context {
@@ -156,18 +214,22 @@ struct chcr_context {
 
 struct chcr_ahash_req_ctx {
        u32 result;
-       char bfr[CHCR_HASH_MAX_BLOCK_SIZE_128];
-       u8 bfr_len;
+       u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128];
+       u8 bfr2[CHCR_HASH_MAX_BLOCK_SIZE_128];
+       u8 *reqbfr;
+       u8 *skbfr;
+       u8 reqlen;
        /* DMA the partial hash in it */
        u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE];
        u64 data_len;  /* Data len till time */
-       void *dummy_payload_ptr;
        /* SKB which is being sent to the hardware for processing */
        struct sk_buff *skb;
 };
 
 struct chcr_blkcipher_req_ctx {
        struct sk_buff *skb;
+       unsigned int dst_nents;
+       u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
 };
 
 struct chcr_alg_template {
@@ -176,16 +238,19 @@ struct chcr_alg_template {
        union {
                struct crypto_alg crypto;
                struct ahash_alg hash;
+               struct aead_alg aead;
        } alg;
 };
 
 struct chcr_req_ctx {
        union {
                struct ahash_request *ahash_req;
+               struct aead_request *aead_req;
                struct ablkcipher_request *ablk_req;
        } req;
        union {
                struct chcr_ahash_req_ctx *ahash_ctx;
+               struct chcr_aead_reqctx *reqctx;
                struct chcr_blkcipher_req_ctx *ablk_ctx;
        } ctx;
 };
@@ -195,9 +260,15 @@ struct sge_opaque_hdr {
        dma_addr_t addr[MAX_SKB_FRAGS + 1];
 };
 
-typedef struct sk_buff *(*create_wr_t)(struct crypto_async_request *req,
-                                      struct chcr_context *ctx,
+typedef struct sk_buff *(*create_wr_t)(struct aead_request *req,
                                       unsigned short qid,
+                                      int size,
                                       unsigned short op_type);
 
+static int chcr_aead_op(struct aead_request *req_base,
+                         unsigned short op_type,
+                         int size,
+                         create_wr_t create_wr_fn);
+static inline int get_aead_subtype(struct crypto_aead *aead);
+
 #endif /* __CHCR_CRYPTO_H__ */
index 37dadb2..6e7a5c7 100644 (file)
@@ -375,10 +375,6 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
        if (!dma->padding_pool)
                return -ENOMEM;
 
-       dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
-       if (!dma->iv_pool)
-               return -ENOMEM;
-
        cesa->dma = dma;
 
        return 0;
index e423d33..a768da7 100644 (file)
@@ -277,7 +277,7 @@ struct mv_cesa_op_ctx {
 #define CESA_TDMA_DUMMY                                0
 #define CESA_TDMA_DATA                         1
 #define CESA_TDMA_OP                           2
-#define CESA_TDMA_IV                           3
+#define CESA_TDMA_RESULT                       3
 
 /**
  * struct mv_cesa_tdma_desc - TDMA descriptor
@@ -393,7 +393,6 @@ struct mv_cesa_dev_dma {
        struct dma_pool *op_pool;
        struct dma_pool *cache_pool;
        struct dma_pool *padding_pool;
-       struct dma_pool *iv_pool;
 };
 
 /**
@@ -839,7 +838,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
        memset(chain, 0, sizeof(*chain));
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
                          u32 size, u32 flags, gfp_t gfp_flags);
 
 struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
index d19dc96..098871a 100644 (file)
@@ -212,7 +212,8 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req)
                struct mv_cesa_req *basereq;
 
                basereq = &creq->base;
-               memcpy(ablkreq->info, basereq->chain.last->data, ivsize);
+               memcpy(ablkreq->info, basereq->chain.last->op->ctx.blkcipher.iv,
+                      ivsize);
        } else {
                memcpy_fromio(ablkreq->info,
                              engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
@@ -373,8 +374,9 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
 
        /* Add output data for IV */
        ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
-       ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
-                                   ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
+       ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET,
+                                   CESA_SA_DATA_SRAM_OFFSET,
+                                   CESA_TDMA_SRC_IN_SRAM, flags);
 
        if (ret)
                goto err_free_tdma;
index 77712b3..317cf02 100644 (file)
@@ -311,24 +311,40 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
        int i;
 
        digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
-       for (i = 0; i < digsize / 4; i++)
-               creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
 
-       if (creq->last_req) {
+       if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
+           (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) {
+               __le32 *data = NULL;
+
                /*
-                * Hardware's MD5 digest is in little endian format, but
-                * SHA in big endian format
+                * Result is already in the correct endianess when the SA is
+                * used
                 */
-               if (creq->algo_le) {
-                       __le32 *result = (void *)ahashreq->result;
+               data = creq->base.chain.last->op->ctx.hash.hash;
+               for (i = 0; i < digsize / 4; i++)
+                       creq->state[i] = cpu_to_le32(data[i]);
 
-                       for (i = 0; i < digsize / 4; i++)
-                               result[i] = cpu_to_le32(creq->state[i]);
-               } else {
-                       __be32 *result = (void *)ahashreq->result;
+               memcpy(ahashreq->result, data, digsize);
+       } else {
+               for (i = 0; i < digsize / 4; i++)
+                       creq->state[i] = readl_relaxed(engine->regs +
+                                                      CESA_IVDIG(i));
+               if (creq->last_req) {
+                       /*
+                       * Hardware's MD5 digest is in little endian format, but
+                       * SHA in big endian format
+                       */
+                       if (creq->algo_le) {
+                               __le32 *result = (void *)ahashreq->result;
+
+                               for (i = 0; i < digsize / 4; i++)
+                                       result[i] = cpu_to_le32(creq->state[i]);
+                       } else {
+                               __be32 *result = (void *)ahashreq->result;
 
-                       for (i = 0; i < digsize / 4; i++)
-                               result[i] = cpu_to_be32(creq->state[i]);
+                               for (i = 0; i < digsize / 4; i++)
+                                       result[i] = cpu_to_be32(creq->state[i]);
+                       }
                }
        }
 
@@ -503,6 +519,12 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
                                                CESA_SA_DESC_CFG_LAST_FRAG,
                                      CESA_SA_DESC_CFG_FRAG_MSK);
 
+               ret = mv_cesa_dma_add_result_op(chain,
+                                               CESA_SA_CFG_SRAM_OFFSET,
+                                               CESA_SA_DATA_SRAM_OFFSET,
+                                               CESA_TDMA_SRC_IN_SRAM, flags);
+               if (ret)
+                       return ERR_PTR(-ENOMEM);
                return op;
        }
 
@@ -563,6 +585,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
        struct mv_cesa_op_ctx *op = NULL;
        unsigned int frag_len;
        int ret;
+       u32 type;
 
        basereq->chain.first = NULL;
        basereq->chain.last = NULL;
@@ -634,7 +657,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
                goto err_free_tdma;
        }
 
-       if (op) {
+       /*
+        * If results are copied via DMA, this means that this
+        * request can be directly processed by the engine,
+        * without partial updates. So we can chain it at the
+        * DMA level with other requests.
+        */
+       type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
+
+       if (op && type != CESA_TDMA_RESULT) {
                /* Add dummy desc to wait for crypto operation end */
                ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
                if (ret)
@@ -647,8 +678,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
        else
                creq->cache_ptr = 0;
 
-       basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
-                                      CESA_TDMA_BREAK_CHAIN);
+       basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
+
+       if (type != CESA_TDMA_RESULT)
+               basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
 
        return 0;
 
index 9fd7a5f..4416b88 100644 (file)
@@ -69,9 +69,6 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
                if (type == CESA_TDMA_OP)
                        dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
                                      le32_to_cpu(tdma->src));
-               else if (type == CESA_TDMA_IV)
-                       dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
-                                     le32_to_cpu(tdma->dst));
 
                tdma = tdma->next;
                dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma,
@@ -209,29 +206,37 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
        return new_tdma;
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
                          u32 size, u32 flags, gfp_t gfp_flags)
 {
-
-       struct mv_cesa_tdma_desc *tdma;
-       u8 *iv;
-       dma_addr_t dma_handle;
+       struct mv_cesa_tdma_desc *tdma, *op_desc;
 
        tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
        if (IS_ERR(tdma))
                return PTR_ERR(tdma);
 
-       iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
-       if (!iv)
-               return -ENOMEM;
+       /* We re-use an existing op_desc object to retrieve the context
+        * and result instead of allocating a new one.
+        * There is at least one object of this type in a CESA crypto
+        * req, just pick the first one in the chain.
+        */
+       for (op_desc = chain->first; op_desc; op_desc = op_desc->next) {
+               u32 type = op_desc->flags & CESA_TDMA_TYPE_MSK;
+
+               if (type == CESA_TDMA_OP)
+                       break;
+       }
+
+       if (!op_desc)
+               return -EIO;
 
        tdma->byte_cnt = cpu_to_le32(size | BIT(31));
        tdma->src = src;
-       tdma->dst = cpu_to_le32(dma_handle);
-       tdma->data = iv;
+       tdma->dst = op_desc->src;
+       tdma->op = op_desc->op;
 
        flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
-       tdma->flags = flags | CESA_TDMA_IV;
+       tdma->flags = flags | CESA_TDMA_RESULT;
        return 0;
 }
 
index 104e9ce..451fa18 100644 (file)
@@ -1073,7 +1073,7 @@ static int mv_probe(struct platform_device *pdev)
        if (!res)
                return -ENXIO;
 
-       cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+       cp = devm_kzalloc(&pdev->dev, sizeof(*cp), GFP_KERNEL);
        if (!cp)
                return -ENOMEM;
 
@@ -1163,7 +1163,6 @@ err_irq:
 err_thread:
        kthread_stop(cp->queue_th);
 err:
-       kfree(cp);
        cpg = NULL;
        return ret;
 }
@@ -1187,7 +1186,6 @@ static int mv_remove(struct platform_device *pdev)
                clk_put(cp->clk);
        }
 
-       kfree(cp);
        cpg = NULL;
        return 0;
 }
index 42f0f22..036057a 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/scatterlist.h>
 #include <linux/device.h>
 #include <linux/of.h>
-#include <linux/types.h>
 #include <asm/hvcall.h>
 #include <asm/vio.h>
 
index 0c49956..1d9ecd3 100644 (file)
@@ -390,7 +390,7 @@ static void sahara_decode_status(struct sahara_dev *dev, unsigned int status)
        if (status & SAHARA_STATUS_MODE_BATCH)
                dev_dbg(dev->device, "  - Batch Mode.\n");
        else if (status & SAHARA_STATUS_MODE_DEDICATED)
-               dev_dbg(dev->device, "  - Decidated Mode.\n");
+               dev_dbg(dev->device, "  - Dedicated Mode.\n");
        else if (status & SAHARA_STATUS_MODE_DEBUG)
                dev_dbg(dev->device, "  - Debug Mode.\n");
 
index 0418a2f..0bba6a1 100644 (file)
@@ -590,7 +590,7 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
                if (v_lo & TALITOS_CCPSR_LO_MDTE)
                        dev_err(dev, "master data transfer error\n");
                if (v_lo & TALITOS_CCPSR_LO_SGDLZ)
-                       dev_err(dev, is_sec1 ? "pointeur not complete error\n"
+                       dev_err(dev, is_sec1 ? "pointer not complete error\n"
                                             : "s/g data length zero error\n");
                if (v_lo & TALITOS_CCPSR_LO_FPZ)
                        dev_err(dev, is_sec1 ? "parity error\n"
index de6e241..55f7c39 100644 (file)
@@ -10,10 +10,12 @@ endif
 quiet_cmd_perl = PERL $@
       cmd_perl = $(PERL) $(<) $(TARGET) > $(@)
 
-$(src)/aesp8-ppc.S: $(src)/aesp8-ppc.pl
-       $(call cmd,perl)
+targets += aesp8-ppc.S ghashp8-ppc.S
+
+$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl FORCE
+       $(call if_changed,perl)
   
-$(src)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl
-       $(call cmd,perl)
+$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl FORCE
+       $(call if_changed,perl)
 
-.PRECIOUS: $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S
+clean-files := aesp8-ppc.S ghashp8-ppc.S
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
new file mode 100644 (file)
index 0000000..e328b52
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * Asynchronous Compression operations
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Weigang Li <weigang.li@intel.com>
+ *          Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ACOMP_H
+#define _CRYPTO_ACOMP_H
+#include <linux/crypto.h>
+
+#define CRYPTO_ACOMP_ALLOC_OUTPUT      0x00000001
+
+/**
+ * struct acomp_req - asynchronous (de)compression request
+ *
+ * @base:      Common attributes for asynchronous crypto requests
+ * @src:       Source Data
+ * @dst:       Destination data
+ * @slen:      Size of the input buffer
+ * @dlen:      Size of the output buffer and number of bytes produced
+ * @flags:     Internal flags
+ * @__ctx:     Start of private context data
+ */
+struct acomp_req {
+       struct crypto_async_request base;
+       struct scatterlist *src;
+       struct scatterlist *dst;
+       unsigned int slen;
+       unsigned int dlen;
+       u32 flags;
+       void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+/**
+ * struct crypto_acomp - user-instantiated objects which encapsulate
+ * algorithms and core processing logic
+ *
+ * @compress:          Function performs a compress operation
+ * @decompress:                Function performs a de-compress operation
+ * @dst_free:          Frees destination buffer if allocated inside the
+ *                     algorithm
+ * @reqsize:           Context size for (de)compression requests
+ * @base:              Common crypto API algorithm data structure
+ */
+struct crypto_acomp {
+       int (*compress)(struct acomp_req *req);
+       int (*decompress)(struct acomp_req *req);
+       void (*dst_free)(struct scatterlist *dst);
+       unsigned int reqsize;
+       struct crypto_tfm base;
+};
+
+/**
+ * struct acomp_alg - asynchronous compression algorithm
+ *
+ * @compress:  Function performs a compress operation
+ * @decompress:        Function performs a de-compress operation
+ * @dst_free:  Frees destination buffer if allocated inside the algorithm
+ * @init:      Initialize the cryptographic transformation object.
+ *             This function is used to initialize the cryptographic
+ *             transformation object. This function is called only once at
+ *             the instantiation time, right after the transformation context
+ *             was allocated. In case the cryptographic hardware has some
+ *             special requirements which need to be handled by software, this
+ *             function shall check for the precise requirement of the
+ *             transformation and put any software fallbacks in place.
+ * @exit:      Deinitialize the cryptographic transformation object. This is a
+ *             counterpart to @init, used to remove various changes set in
+ *             @init.
+ *
+ * @reqsize:   Context size for (de)compression requests
+ * @base:      Common crypto API algorithm data structure
+ */
+struct acomp_alg {
+       int (*compress)(struct acomp_req *req);
+       int (*decompress)(struct acomp_req *req);
+       void (*dst_free)(struct scatterlist *dst);
+       int (*init)(struct crypto_acomp *tfm);
+       void (*exit)(struct crypto_acomp *tfm);
+       unsigned int reqsize;
+       struct crypto_alg base;
+};
+
+/**
+ * DOC: Asynchronous Compression API
+ *
+ * The Asynchronous Compression API is used with the algorithms of type
+ * CRYPTO_ALG_TYPE_ACOMPRESS (listed as type "acomp" in /proc/crypto)
+ */
+
+/**
+ * crypto_alloc_acomp() -- allocate ACOMPRESS tfm handle
+ * @alg_name:  is the cra_name / name or cra_driver_name / driver name of the
+ *             compression algorithm e.g. "deflate"
+ * @type:      specifies the type of the algorithm
+ * @mask:      specifies the mask for the algorithm
+ *
+ * Allocate a handle for a compression algorithm. The returned struct
+ * crypto_acomp is the handle that is required for any subsequent
+ * API invocation for the compression operations.
+ *
+ * Return:     allocated handle in case of success; IS_ERR() is true in case
+ *             of an error, PTR_ERR() returns the error code.
+ */
+struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type,
+                                       u32 mask);
+
+static inline struct crypto_tfm *crypto_acomp_tfm(struct crypto_acomp *tfm)
+{
+       return &tfm->base;
+}
+
+static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg)
+{
+       return container_of(alg, struct acomp_alg, base);
+}
+
+static inline struct crypto_acomp *__crypto_acomp_tfm(struct crypto_tfm *tfm)
+{
+       return container_of(tfm, struct crypto_acomp, base);
+}
+
+static inline struct acomp_alg *crypto_acomp_alg(struct crypto_acomp *tfm)
+{
+       return __crypto_acomp_alg(crypto_acomp_tfm(tfm)->__crt_alg);
+}
+
+static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm)
+{
+       return tfm->reqsize;
+}
+
+static inline void acomp_request_set_tfm(struct acomp_req *req,
+                                        struct crypto_acomp *tfm)
+{
+       req->base.tfm = crypto_acomp_tfm(tfm);
+}
+
+static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req)
+{
+       return __crypto_acomp_tfm(req->base.tfm);
+}
+
+/**
+ * crypto_free_acomp() -- free ACOMPRESS tfm handle
+ *
+ * @tfm:       ACOMPRESS tfm handle allocated with crypto_alloc_acomp()
+ */
+static inline void crypto_free_acomp(struct crypto_acomp *tfm)
+{
+       crypto_destroy_tfm(tfm, crypto_acomp_tfm(tfm));
+}
+
+static inline int crypto_has_acomp(const char *alg_name, u32 type, u32 mask)
+{
+       type &= ~CRYPTO_ALG_TYPE_MASK;
+       type |= CRYPTO_ALG_TYPE_ACOMPRESS;
+       mask |= CRYPTO_ALG_TYPE_MASK;
+
+       return crypto_has_alg(alg_name, type, mask);
+}
+
+/**
+ * acomp_request_alloc() -- allocates asynchronous (de)compression request
+ *
+ * @tfm:       ACOMPRESS tfm handle allocated with crypto_alloc_acomp()
+ *
+ * Return:     allocated handle in case of success or NULL in case of an error
+ */
+struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm);
+
+/**
+ * acomp_request_free() -- zeroize and free asynchronous (de)compression
+ *                        request as well as the output buffer if allocated
+ *                        inside the algorithm
+ *
+ * @req:       request to free
+ */
+void acomp_request_free(struct acomp_req *req);
+
+/**
+ * acomp_request_set_callback() -- Sets an asynchronous callback
+ *
+ * Callback will be called when an asynchronous operation on a given
+ * request is finished.
+ *
+ * @req:       request that the callback will be set for
+ * @flgs:      specify for instance if the operation may backlog
+ * @cmlp:      callback which will be called
+ * @data:      private data used by the caller
+ */
+static inline void acomp_request_set_callback(struct acomp_req *req,
+                                             u32 flgs,
+                                             crypto_completion_t cmpl,
+                                             void *data)
+{
+       req->base.complete = cmpl;
+       req->base.data = data;
+       req->base.flags = flgs;
+}
+
+/**
+ * acomp_request_set_params() -- Sets request parameters
+ *
+ * Sets parameters required by an acomp operation
+ *
+ * @req:       asynchronous compress request
+ * @src:       pointer to input buffer scatterlist
+ * @dst:       pointer to output buffer scatterlist. If this is NULL, the
+ *             acomp layer will allocate the output memory
+ * @slen:      size of the input buffer
+ * @dlen:      size of the output buffer. If dst is NULL, this can be used by
+ *             the user to specify the maximum amount of memory to allocate
+ */
+static inline void acomp_request_set_params(struct acomp_req *req,
+                                           struct scatterlist *src,
+                                           struct scatterlist *dst,
+                                           unsigned int slen,
+                                           unsigned int dlen)
+{
+       req->src = src;
+       req->dst = dst;
+       req->slen = slen;
+       req->dlen = dlen;
+
+       if (!req->dst)
+               req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
+}
+
+/**
+ * crypto_acomp_compress() -- Invoke asynchronous compress operation
+ *
+ * Function invokes the asynchronous compress operation
+ *
+ * @req:       asynchronous compress request
+ *
+ * Return:     zero on success; error code in case of error
+ */
+static inline int crypto_acomp_compress(struct acomp_req *req)
+{
+       struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+       return tfm->compress(req);
+}
+
+/**
+ * crypto_acomp_decompress() -- Invoke asynchronous decompress operation
+ *
+ * Function invokes the asynchronous decompress operation
+ *
+ * @req:       asynchronous compress request
+ *
+ * Return:     zero on success; error code in case of error
+ */
+static inline int crypto_acomp_decompress(struct acomp_req *req)
+{
+       struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+       return tfm->decompress(req);
+}
+
+#endif
diff --git a/include/crypto/cbc.h b/include/crypto/cbc.h
new file mode 100644 (file)
index 0000000..f5b8bfc
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * CBC: Cipher Block Chaining mode
+ *
+ * Copyright (c) 2016 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_CBC_H
+#define _CRYPTO_CBC_H
+
+#include <crypto/internal/skcipher.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+static inline int crypto_cbc_encrypt_segment(
+       struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+       void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+       unsigned int bsize = crypto_skcipher_blocksize(tfm);
+       unsigned int nbytes = walk->nbytes;
+       u8 *src = walk->src.virt.addr;
+       u8 *dst = walk->dst.virt.addr;
+       u8 *iv = walk->iv;
+
+       do {
+               crypto_xor(iv, src, bsize);
+               fn(tfm, iv, dst);
+               memcpy(iv, dst, bsize);
+
+               src += bsize;
+               dst += bsize;
+       } while ((nbytes -= bsize) >= bsize);
+
+       return nbytes;
+}
+
+static inline int crypto_cbc_encrypt_inplace(
+       struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+       void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+       unsigned int bsize = crypto_skcipher_blocksize(tfm);
+       unsigned int nbytes = walk->nbytes;
+       u8 *src = walk->src.virt.addr;
+       u8 *iv = walk->iv;
+
+       do {
+               crypto_xor(src, iv, bsize);
+               fn(tfm, src, src);
+               iv = src;
+
+               src += bsize;
+       } while ((nbytes -= bsize) >= bsize);
+
+       memcpy(walk->iv, iv, bsize);
+
+       return nbytes;
+}
+
+static inline int crypto_cbc_encrypt_walk(struct skcipher_request *req,
+                                         void (*fn)(struct crypto_skcipher *,
+                                                    const u8 *, u8 *))
+{
+       struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+       struct skcipher_walk walk;
+       int err;
+
+       err = skcipher_walk_virt(&walk, req, false);
+
+       while (walk.nbytes) {
+               if (walk.src.virt.addr == walk.dst.virt.addr)
+                       err = crypto_cbc_encrypt_inplace(&walk, tfm, fn);
+               else
+                       err = crypto_cbc_encrypt_segment(&walk, tfm, fn);
+               err = skcipher_walk_done(&walk, err);
+       }
+
+       return err;
+}
+
+static inline int crypto_cbc_decrypt_segment(
+       struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+       void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+       unsigned int bsize = crypto_skcipher_blocksize(tfm);
+       unsigned int nbytes = walk->nbytes;
+       u8 *src = walk->src.virt.addr;
+       u8 *dst = walk->dst.virt.addr;
+       u8 *iv = walk->iv;
+
+       do {
+               fn(tfm, src, dst);
+               crypto_xor(dst, iv, bsize);
+               iv = src;
+
+               src += bsize;
+               dst += bsize;
+       } while ((nbytes -= bsize) >= bsize);
+
+       memcpy(walk->iv, iv, bsize);
+
+       return nbytes;
+}
+
+static inline int crypto_cbc_decrypt_inplace(
+       struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+       void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+       unsigned int bsize = crypto_skcipher_blocksize(tfm);
+       unsigned int nbytes = walk->nbytes;
+       u8 *src = walk->src.virt.addr;
+       u8 last_iv[bsize];
+
+       /* Start of the last block. */
+       src += nbytes - (nbytes & (bsize - 1)) - bsize;
+       memcpy(last_iv, src, bsize);
+
+       for (;;) {
+               fn(tfm, src, src);
+               if ((nbytes -= bsize) < bsize)
+                       break;
+               crypto_xor(src, src - bsize, bsize);
+               src -= bsize;
+       }
+
+       crypto_xor(src, walk->iv, bsize);
+       memcpy(walk->iv, last_iv, bsize);
+
+       return nbytes;
+}
+
+static inline int crypto_cbc_decrypt_blocks(
+       struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+       void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+       if (walk->src.virt.addr == walk->dst.virt.addr)
+               return crypto_cbc_decrypt_inplace(walk, tfm, fn);
+       else
+               return crypto_cbc_decrypt_segment(walk, tfm, fn);
+}
+
+#endif /* _CRYPTO_CBC_H */
index bc792d5..94418cb 100644 (file)
 #ifndef _CRYPTO_CRYPT_H
 #define _CRYPTO_CRYPT_H
 
-#include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <crypto/aead.h>
 #include <crypto/hash.h>
+#include <crypto/skcipher.h>
 
 struct cryptd_ablkcipher {
        struct crypto_ablkcipher base;
@@ -34,6 +34,17 @@ struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm);
 bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm);
 void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm);
 
+struct cryptd_skcipher {
+       struct crypto_skcipher base;
+};
+
+struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name,
+                                             u32 type, u32 mask);
+struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm);
+/* Must be called without moving CPUs. */
+bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm);
+void cryptd_free_skcipher(struct cryptd_skcipher *tfm);
+
 struct cryptd_ahash {
        struct crypto_ahash base;
 };
index 04eb5c7..1bf600f 100644 (file)
@@ -43,8 +43,7 @@
  * @prepare_hash_request: do some prepare if need before handle the current request
  * @unprepare_hash_request: undo any work done by prepare_hash_request()
  * @hash_one_request: do hash for current request
- * @kworker: thread struct for request pump
- * @kworker_task: pointer to task for request pump kworker thread
+ * @kworker: kthread worker struct for request pump
  * @pump_requests: work struct for scheduling work to the request pump
  * @priv_data: the engine private data
  * @cur_req: the current request which is on processing
@@ -78,8 +77,7 @@ struct crypto_engine {
        int (*hash_one_request)(struct crypto_engine *engine,
                                struct ahash_request *req);
 
-       struct kthread_worker           kworker;
-       struct task_struct              *kworker_task;
+       struct kthread_worker           *kworker;
        struct kthread_work             pump_requests;
 
        void                            *priv_data;
index da2530e..592d47e 100644 (file)
@@ -177,24 +177,23 @@ void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t);
 
 static inline void gf128mul_free_4k(struct gf128mul_4k *t)
 {
-       kfree(t);
+       kzfree(t);
 }
 
 
-/* 64k table optimization, implemented for lle and bbe */
+/* 64k table optimization, implemented for bbe */
 
 struct gf128mul_64k {
        struct gf128mul_4k *t[16];
 };
 
-/* first initialize with the constant factor with which you
- * want to multiply and then call gf128_64k_lle with the other
- * factor in the first argument, the table in the second and a
- * scratch register in the third. Afterwards *a = *r. */
-struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g);
+/* First initialize with the constant factor with which you
+ * want to multiply and then call gf128mul_64k_bbe with the other
+ * factor in the first argument, and the table in the second.
+ * Afterwards, the result is stored in *a.
+ */
 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g);
 void gf128mul_free_64k(struct gf128mul_64k *t);
-void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t);
 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t);
 
 #endif /* _CRYPTO_GF128MUL_H */
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
new file mode 100644 (file)
index 0000000..1de2b5a
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Asynchronous Compression operations
+ *
+ * Copyright (c) 2016, Intel Corporation
+ * Authors: Weigang Li <weigang.li@intel.com>
+ *          Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_ACOMP_INT_H
+#define _CRYPTO_ACOMP_INT_H
+#include <crypto/acompress.h>
+
+/*
+ * Transform internal helpers.
+ */
+static inline void *acomp_request_ctx(struct acomp_req *req)
+{
+       return req->__ctx;
+}
+
+static inline void *acomp_tfm_ctx(struct crypto_acomp *tfm)
+{
+       return tfm->base.__crt_ctx;
+}
+
+static inline void acomp_request_complete(struct acomp_req *req,
+                                         int err)
+{
+       req->base.complete(&req->base, err);
+}
+
+static inline const char *acomp_alg_name(struct crypto_acomp *tfm)
+{
+       return crypto_acomp_tfm(tfm)->__crt_alg->cra_name;
+}
+
+static inline struct acomp_req *__acomp_request_alloc(struct crypto_acomp *tfm)
+{
+       struct acomp_req *req;
+
+       req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL);
+       if (likely(req))
+               acomp_request_set_tfm(req, tfm);
+       return req;
+}
+
+static inline void __acomp_request_free(struct acomp_req *req)
+{
+       kzfree(req);
+}
+
+/**
+ * crypto_register_acomp() -- Register asynchronous compression algorithm
+ *
+ * Function registers an implementation of an asynchronous
+ * compression algorithm
+ *
+ * @alg:       algorithm definition
+ *
+ * Return:     zero on success; error code in case of error
+ */
+int crypto_register_acomp(struct acomp_alg *alg);
+
+/**
+ * crypto_unregister_acomp() -- Unregister asynchronous compression algorithm
+ *
+ * Function unregisters an implementation of an asynchronous
+ * compression algorithm
+ *
+ * @alg:       algorithm definition
+ *
+ * Return:     zero on success; error code in case of error
+ */
+int crypto_unregister_acomp(struct acomp_alg *alg);
+
+#endif
diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h
new file mode 100644 (file)
index 0000000..3fda3c5
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Synchronous Compression operations
+ *
+ * Copyright 2015 LG Electronics Inc.
+ * Copyright (c) 2016, Intel Corporation
+ * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#ifndef _CRYPTO_SCOMP_INT_H
+#define _CRYPTO_SCOMP_INT_H
+#include <linux/crypto.h>
+
+#define SCOMP_SCRATCH_SIZE     131072
+
+struct crypto_scomp {
+       struct crypto_tfm base;
+};
+
+/**
+ * struct scomp_alg - synchronous compression algorithm
+ *
+ * @alloc_ctx: Function allocates algorithm specific context
+ * @free_ctx:  Function frees context allocated with alloc_ctx
+ * @compress:  Function performs a compress operation
+ * @decompress:        Function performs a de-compress operation
+ * @init:      Initialize the cryptographic transformation object.
+ *             This function is used to initialize the cryptographic
+ *             transformation object. This function is called only once at
+ *             the instantiation time, right after the transformation context
+ *             was allocated. In case the cryptographic hardware has some
+ *             special requirements which need to be handled by software, this
+ *             function shall check for the precise requirement of the
+ *             transformation and put any software fallbacks in place.
+ * @exit:      Deinitialize the cryptographic transformation object. This is a
+ *             counterpart to @init, used to remove various changes set in
+ *             @init.
+ * @base:      Common crypto API algorithm data structure
+ */
+struct scomp_alg {
+       void *(*alloc_ctx)(struct crypto_scomp *tfm);
+       void (*free_ctx)(struct crypto_scomp *tfm, void *ctx);
+       int (*compress)(struct crypto_scomp *tfm, const u8 *src,
+                       unsigned int slen, u8 *dst, unsigned int *dlen,
+                       void *ctx);
+       int (*decompress)(struct crypto_scomp *tfm, const u8 *src,
+                         unsigned int slen, u8 *dst, unsigned int *dlen,
+                         void *ctx);
+       struct crypto_alg base;
+};
+
+static inline struct scomp_alg *__crypto_scomp_alg(struct crypto_alg *alg)
+{
+       return container_of(alg, struct scomp_alg, base);
+}
+
+static inline struct crypto_scomp *__crypto_scomp_tfm(struct crypto_tfm *tfm)
+{
+       return container_of(tfm, struct crypto_scomp, base);
+}
+
+static inline struct crypto_tfm *crypto_scomp_tfm(struct crypto_scomp *tfm)
+{
+       return &tfm->base;
+}
+
+static inline void crypto_free_scomp(struct crypto_scomp *tfm)
+{
+       crypto_destroy_tfm(tfm, crypto_scomp_tfm(tfm));
+}
+
+static inline struct scomp_alg *crypto_scomp_alg(struct crypto_scomp *tfm)
+{
+       return __crypto_scomp_alg(crypto_scomp_tfm(tfm)->__crt_alg);
+}
+
+static inline void *crypto_scomp_alloc_ctx(struct crypto_scomp *tfm)
+{
+       return crypto_scomp_alg(tfm)->alloc_ctx(tfm);
+}
+
+static inline void crypto_scomp_free_ctx(struct crypto_scomp *tfm,
+                                        void *ctx)
+{
+       return crypto_scomp_alg(tfm)->free_ctx(tfm, ctx);
+}
+
+static inline int crypto_scomp_compress(struct crypto_scomp *tfm,
+                                       const u8 *src, unsigned int slen,
+                                       u8 *dst, unsigned int *dlen, void *ctx)
+{
+       return crypto_scomp_alg(tfm)->compress(tfm, src, slen, dst, dlen, ctx);
+}
+
+static inline int crypto_scomp_decompress(struct crypto_scomp *tfm,
+                                         const u8 *src, unsigned int slen,
+                                         u8 *dst, unsigned int *dlen,
+                                         void *ctx)
+{
+       return crypto_scomp_alg(tfm)->decompress(tfm, src, slen, dst, dlen,
+                                                ctx);
+}
+
+int crypto_init_scomp_ops_async(struct crypto_tfm *tfm);
+struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req);
+void crypto_acomp_scomp_free_ctx(struct acomp_req *req);
+
+/**
+ * crypto_register_scomp() -- Register synchronous compression algorithm
+ *
+ * Function registers an implementation of a synchronous
+ * compression algorithm
+ *
+ * @alg:       algorithm definition
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_register_scomp(struct scomp_alg *alg);
+
+/**
+ * crypto_unregister_scomp() -- Unregister synchronous compression algorithm
+ *
+ * Function unregisters an implementation of a synchronous
+ * compression algorithm
+ *
+ * @alg:       algorithm definition
+ *
+ * Return: zero on success; error code in case of error
+ */
+int crypto_unregister_scomp(struct scomp_alg *alg);
+
+#endif
diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h
new file mode 100644 (file)
index 0000000..4295099
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Shared crypto simd helpers
+ */
+
+#ifndef _CRYPTO_INTERNAL_SIMD_H
+#define _CRYPTO_INTERNAL_SIMD_H
+
+struct simd_skcipher_alg;
+
+struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
+                                                     const char *drvname,
+                                                     const char *basename);
+struct simd_skcipher_alg *simd_skcipher_create(const char *algname,
+                                              const char *basename);
+void simd_skcipher_free(struct simd_skcipher_alg *alg);
+
+#endif /* _CRYPTO_INTERNAL_SIMD_H */
index a21a95e..8735979 100644 (file)
 
 #include <crypto/algapi.h>
 #include <crypto/skcipher.h>
+#include <linux/list.h>
 #include <linux/types.h>
 
+struct aead_request;
 struct rtattr;
 
 struct skcipher_instance {
@@ -34,6 +36,40 @@ struct crypto_skcipher_spawn {
        struct crypto_spawn base;
 };
 
+struct skcipher_walk {
+       union {
+               struct {
+                       struct page *page;
+                       unsigned long offset;
+               } phys;
+
+               struct {
+                       u8 *page;
+                       void *addr;
+               } virt;
+       } src, dst;
+
+       struct scatter_walk in;
+       unsigned int nbytes;
+
+       struct scatter_walk out;
+       unsigned int total;
+
+       struct list_head buffers;
+
+       u8 *page;
+       u8 *buffer;
+       u8 *oiv;
+       void *iv;
+
+       unsigned int ivsize;
+
+       int flags;
+       unsigned int blocksize;
+       unsigned int chunksize;
+       unsigned int alignmask;
+};
+
 extern const struct crypto_type crypto_givcipher_type;
 
 static inline struct crypto_instance *skcipher_crypto_instance(
@@ -68,14 +104,6 @@ static inline void crypto_set_skcipher_spawn(
 int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name,
                         u32 type, u32 mask);
 
-static inline int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn,
-                                       const char *name, u32 type, u32 mask)
-{
-       return crypto_grab_skcipher(spawn, name, type, mask);
-}
-
-struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask);
-
 static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn)
 {
        crypto_drop_spawn(&spawn->base);
@@ -99,12 +127,6 @@ static inline struct crypto_skcipher *crypto_spawn_skcipher(
        return crypto_spawn_tfm2(&spawn->base);
 }
 
-static inline struct crypto_skcipher *crypto_spawn_skcipher2(
-       struct crypto_skcipher_spawn *spawn)
-{
-       return crypto_spawn_skcipher(spawn);
-}
-
 static inline void crypto_skcipher_set_reqsize(
        struct crypto_skcipher *skcipher, unsigned int reqsize)
 {
@@ -118,6 +140,21 @@ void crypto_unregister_skciphers(struct skcipher_alg *algs, int count);
 int skcipher_register_instance(struct crypto_template *tmpl,
                               struct skcipher_instance *inst);
 
+int skcipher_walk_done(struct skcipher_walk *walk, int err);
+int skcipher_walk_virt(struct skcipher_walk *walk,
+                      struct skcipher_request *req,
+                      bool atomic);
+void skcipher_walk_atomise(struct skcipher_walk *walk);
+int skcipher_walk_async(struct skcipher_walk *walk,
+                       struct skcipher_request *req);
+int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req,
+                      bool atomic);
+int skcipher_walk_aead_encrypt(struct skcipher_walk *walk,
+                              struct aead_request *req, bool atomic);
+int skcipher_walk_aead_decrypt(struct skcipher_walk *walk,
+                              struct aead_request *req, bool atomic);
+void skcipher_walk_complete(struct skcipher_walk *walk, int err);
+
 static inline void ablkcipher_request_complete(struct ablkcipher_request *req,
                                               int err)
 {
index ede6b97..77b6306 100644 (file)
@@ -2,8 +2,7 @@
 #define _CRYPTO_XTS_H
 
 #include <crypto/b128ops.h>
-#include <linux/crypto.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/fips.h>
 
 struct scatterlist;
@@ -51,4 +50,27 @@ static inline int xts_check_key(struct crypto_tfm *tfm,
        return 0;
 }
 
+static inline int xts_verify_key(struct crypto_skcipher *tfm,
+                                const u8 *key, unsigned int keylen)
+{
+       /*
+        * key consists of keys of equal size concatenated, therefore
+        * the length must be even.
+        */
+       if (keylen % 2) {
+               crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       /* ensure that the AES and tweak key are not identical */
+       if ((fips_enabled || crypto_skcipher_get_flags(tfm) &
+                            CRYPTO_TFM_REQ_WEAK_KEY) &&
+           !crypto_memneq(key, key + (keylen / 2), keylen / 2)) {
+               crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 #endif  /* _CRYPTO_XTS_H */
index a765333..c71dd8f 100644 (file)
@@ -11,8 +11,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef __CPP_H__
-#define __CPP_H__
+#ifndef __CCP_H__
+#define __CCP_H__
 
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
@@ -553,7 +553,7 @@ enum ccp_engine {
 #define CCP_CMD_PASSTHRU_NO_DMA_MAP    0x00000002
 
 /**
- * struct ccp_cmd - CPP operation request
+ * struct ccp_cmd - CCP operation request
  * @entry: list element (ccp driver use only)
  * @work: work element used for callbacks (ccp driver use only)
  * @ccp: CCP device to be run on (ccp driver use only)
index 7cee555..167aea2 100644 (file)
@@ -50,6 +50,8 @@
 #define CRYPTO_ALG_TYPE_SKCIPHER       0x00000005
 #define CRYPTO_ALG_TYPE_GIVCIPHER      0x00000006
 #define CRYPTO_ALG_TYPE_KPP            0x00000008
+#define CRYPTO_ALG_TYPE_ACOMPRESS      0x0000000a
+#define CRYPTO_ALG_TYPE_SCOMPRESS      0x0000000b
 #define CRYPTO_ALG_TYPE_RNG            0x0000000c
 #define CRYPTO_ALG_TYPE_AKCIPHER       0x0000000d
 #define CRYPTO_ALG_TYPE_DIGEST         0x0000000e
@@ -60,6 +62,7 @@
 #define CRYPTO_ALG_TYPE_HASH_MASK      0x0000000e
 #define CRYPTO_ALG_TYPE_AHASH_MASK     0x0000000e
 #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c
+#define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e
 
 #define CRYPTO_ALG_LARVAL              0x00000010
 #define CRYPTO_ALG_DEAD                        0x00000020
@@ -87,7 +90,7 @@
 #define CRYPTO_ALG_TESTED              0x00000400
 
 /*
- * Set if the algorithm is an instance that is build from templates.
+ * Set if the algorithm is an instance that is built from templates.
  */
 #define CRYPTO_ALG_INSTANCE            0x00000800
 
index 34a0dc1..bee0827 100644 (file)
@@ -30,8 +30,7 @@
  *                     Must not be NULL.    *OBSOLETE*
  * @read:              New API. drivers can fill up to max bytes of data
  *                     into the buffer. The buffer is aligned for any type
- *                     and max is guaranteed to be >= to that alignment
- *                     (either 4 or 8 depending on architecture).
+ *                     and max is a multiple of 4 and >= 32 bytes.
  * @priv:              Private data, for use by the RNG driver.
  * @quality:           Estimation of true entropy in RNG's bitstream
  *                     (per mill).
index 79b5ded..11d21fc 100644 (file)
@@ -46,6 +46,7 @@ enum crypto_attr_type_t {
        CRYPTOCFGA_REPORT_CIPHER,       /* struct crypto_report_cipher */
        CRYPTOCFGA_REPORT_AKCIPHER,     /* struct crypto_report_akcipher */
        CRYPTOCFGA_REPORT_KPP,          /* struct crypto_report_kpp */
+       CRYPTOCFGA_REPORT_ACOMP,        /* struct crypto_report_acomp */
        __CRYPTOCFGA_MAX
 
 #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
@@ -112,5 +113,9 @@ struct crypto_report_kpp {
        char type[CRYPTO_MAX_NAME];
 };
 
+struct crypto_report_acomp {
+       char type[CRYPTO_MAX_NAME];
+};
+
 #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
                               sizeof(struct crypto_report_blkcipher))
index 7848f05..05316c9 100644 (file)
@@ -64,15 +64,11 @@ static int padata_cpu_hash(struct parallel_data *pd)
 static void padata_parallel_worker(struct work_struct *parallel_work)
 {
        struct padata_parallel_queue *pqueue;
-       struct parallel_data *pd;
-       struct padata_instance *pinst;
        LIST_HEAD(local_list);
 
        local_bh_disable();
        pqueue = container_of(parallel_work,
                              struct padata_parallel_queue, work);
-       pd = pqueue->pd;
-       pinst = pd->pinst;
 
        spin_lock(&pqueue->parallel.lock);
        list_replace_init(&pqueue->parallel.list, &local_list);