crypto: serpent-sse2 - prepare serpent-sse2 glue code into generic x86 glue code...
authorJussi Kivilinna <jussi.kivilinna@mbnet.fi>
Mon, 18 Jun 2012 11:07:14 +0000 (14:07 +0300)
committerHerbert Xu <herbert@gondor.apana.org.au>
Wed, 27 Jun 2012 06:42:01 +0000 (14:42 +0800)
Block cipher implementations in arch/x86/crypto/ contain common glue code that
is currently duplicated in each module (camellia-x86_64, twofish-x86_64-3way,
twofish-avx, serpent-sse2 and serpent-avx). This patch prepares serpent-sse2
glue into generic glue code for all 128bit block ciphers to use in
arch/x86/crypto.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/serpent_sse2_glue.c

index 805c91f..8c86239 100644 (file)
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
+typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
+typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
+                                      u128 *iv);
+
+#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
+#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
+#define GLUE_CTR_FUNC_CAST(fn) ((common_glue_ctr_func_t)(fn))
+
+struct common_glue_func_entry {
+       unsigned int num_blocks; /* number of blocks that @fn will process */
+       union {
+               common_glue_func_t ecb;
+               common_glue_cbc_func_t cbc;
+               common_glue_ctr_func_t ctr;
+       } fn_u;
+};
+
+struct common_glue_ctx {
+       unsigned int num_funcs;
+       int fpu_blocks_limit; /* -1 means fpu not needed at all */
+
+       /*
+        * First funcs entry must have largest num_blocks and last funcs entry
+        * must have num_blocks == 1!
+        */
+       struct common_glue_func_entry funcs[];
+};
+
+static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
+                                 struct blkcipher_desc *desc,
+                                 bool fpu_enabled, unsigned int nbytes)
 {
+       if (likely(fpu_blocks_limit < 0))
+               return false;
+
        if (fpu_enabled)
                return true;
 
-       /* SSE2 is only used when chunk to be processed is large enough, so
-        * do not enable FPU until it is necessary.
+       /*
+        * Vector-registers are only used when chunk to be processed is large
+        * enough, so do not enable FPU until it is necessary.
         */
-       if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
+       if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
                return false;
 
+       if (desc) {
+               /* prevent sleeping if FPU is in use */
+               desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       }
+
        kernel_fpu_begin();
        return true;
 }
 
-static inline void serpent_fpu_end(bool fpu_enabled)
+static inline void glue_fpu_end(bool fpu_enabled)
 {
        if (fpu_enabled)
                kernel_fpu_end();
 }
 
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
-                    bool enc)
+static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+                                  struct blkcipher_desc *desc,
+                                  struct blkcipher_walk *walk)
 {
+       void *ctx = crypto_blkcipher_ctx(desc->tfm);
+       const unsigned int bsize = 128 / 8;
+       unsigned int nbytes, i, func_bytes;
        bool fpu_enabled = false;
-       struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       const unsigned int bsize = SERPENT_BLOCK_SIZE;
-       unsigned int nbytes;
        int err;
 
        err = blkcipher_walk_virt(desc, walk);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
        while ((nbytes = walk->nbytes)) {
                u8 *wsrc = walk->src.virt.addr;
                u8 *wdst = walk->dst.virt.addr;
 
-               fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
+               fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+                                            desc, fpu_enabled, nbytes);
 
-               /* Process multi-block batch */
-               if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-                       do {
-                               if (enc)
-                                       serpent_enc_blk_xway(ctx, wdst, wsrc);
-                               else
-                                       serpent_dec_blk_xway(ctx, wdst, wsrc);
+               for (i = 0; i < gctx->num_funcs; i++) {
+                       func_bytes = bsize * gctx->funcs[i].num_blocks;
 
-                               wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
-                               wdst += bsize * SERPENT_PARALLEL_BLOCKS;
-                               nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-                       } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
+                       /* Process multi-block batch */
+                       if (nbytes >= func_bytes) {
+                               do {
+                                       gctx->funcs[i].fn_u.ecb(ctx, wdst,
+                                                               wsrc);
 
-                       if (nbytes < bsize)
-                               goto done;
-               }
-
-               /* Handle leftovers */
-               do {
-                       if (enc)
-                               __serpent_encrypt(ctx, wdst, wsrc);
-                       else
-                               __serpent_decrypt(ctx, wdst, wsrc);
+                                       wsrc += func_bytes;
+                                       wdst += func_bytes;
+                                       nbytes -= func_bytes;
+                               } while (nbytes >= func_bytes);
 
-                       wsrc += bsize;
-                       wdst += bsize;
-                       nbytes -= bsize;
-               } while (nbytes >= bsize);
+                               if (nbytes < bsize)
+                                       goto done;
+                       }
+               }
 
 done:
                err = blkcipher_walk_done(desc, walk, nbytes);
        }
 
-       serpent_fpu_end(fpu_enabled);
+       glue_fpu_end(fpu_enabled);
        return err;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
+                         struct blkcipher_desc *desc, struct scatterlist *dst,
+                         struct scatterlist *src, unsigned int nbytes)
 {
        struct blkcipher_walk walk;
 
        blkcipher_walk_init(&walk, dst, src, nbytes);
-       return ecb_crypt(desc, &walk, true);
+       return __glue_ecb_crypt_128bit(gctx, desc, &walk);
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+                                             struct blkcipher_desc *desc,
+                                             struct blkcipher_walk *walk)
 {
-       struct blkcipher_walk walk;
-
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       return ecb_crypt(desc, &walk, false);
-}
-
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
-                                 struct blkcipher_walk *walk)
-{
-       struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       const unsigned int bsize = SERPENT_BLOCK_SIZE;
+       void *ctx = crypto_blkcipher_ctx(desc->tfm);
+       const unsigned int bsize = 128 / 8;
        unsigned int nbytes = walk->nbytes;
        u128 *src = (u128 *)walk->src.virt.addr;
        u128 *dst = (u128 *)walk->dst.virt.addr;
@@ -154,7 +178,7 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 
        do {
                u128_xor(dst, src, iv);
-               __serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+               fn(ctx, (u8 *)dst, (u8 *)dst);
                iv = dst;
 
                src += 1;
@@ -166,8 +190,10 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
        return nbytes;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
+                           struct blkcipher_desc *desc,
+                           struct scatterlist *dst,
+                           struct scatterlist *src, unsigned int nbytes)
 {
        struct blkcipher_walk walk;
        int err;
@@ -176,24 +202,26 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
        err = blkcipher_walk_virt(desc, &walk);
 
        while ((nbytes = walk.nbytes)) {
-               nbytes = __cbc_encrypt(desc, &walk);
+               nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
                err = blkcipher_walk_done(desc, &walk, nbytes);
        }
 
        return err;
 }
 
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
-                                 struct blkcipher_walk *walk)
+static unsigned int
+__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+                         struct blkcipher_desc *desc,
+                         struct blkcipher_walk *walk)
 {
-       struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       const unsigned int bsize = SERPENT_BLOCK_SIZE;
+       void *ctx = crypto_blkcipher_ctx(desc->tfm);
+       const unsigned int bsize = 128 / 8;
        unsigned int nbytes = walk->nbytes;
        u128 *src = (u128 *)walk->src.virt.addr;
        u128 *dst = (u128 *)walk->dst.virt.addr;
-       u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
        u128 last_iv;
-       int i;
+       unsigned int num_blocks, func_bytes;
+       unsigned int i;
 
        /* Start of the last block. */
        src += nbytes / bsize - 1;
@@ -201,45 +229,31 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
 
        last_iv = *src;
 
-       /* Process multi-block batch */
-       if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-               do {
-                       nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
-                       src -= SERPENT_PARALLEL_BLOCKS - 1;
-                       dst -= SERPENT_PARALLEL_BLOCKS - 1;
+       for (i = 0; i < gctx->num_funcs; i++) {
+               num_blocks = gctx->funcs[i].num_blocks;
+               func_bytes = bsize * num_blocks;
 
-                       for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
-                               ivs[i] = src[i];
+               /* Process multi-block batch */
+               if (nbytes >= func_bytes) {
+                       do {
+                               nbytes -= func_bytes - bsize;
+                               src -= num_blocks - 1;
+                               dst -= num_blocks - 1;
+
+                               gctx->funcs[i].fn_u.cbc(ctx, dst, src);
 
-                       serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+                               nbytes -= bsize;
+                               if (nbytes < bsize)
+                                       goto done;
 
-                       for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
-                               u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
+                               u128_xor(dst, dst, src - 1);
+                               src -= 1;
+                               dst -= 1;
+                       } while (nbytes >= func_bytes);
 
-                       nbytes -= bsize;
                        if (nbytes < bsize)
                                goto done;
-
-                       u128_xor(dst, dst, src - 1);
-                       src -= 1;
-                       dst -= 1;
-               } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
-
-               if (nbytes < bsize)
-                       goto done;
-       }
-
-       /* Handle leftovers */
-       for (;;) {
-               __serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
-
-               nbytes -= bsize;
-               if (nbytes < bsize)
-                       break;
-
-               u128_xor(dst, dst, src - 1);
-               src -= 1;
-               dst -= 1;
+               }
        }
 
 done:
@@ -249,24 +263,27 @@ done:
        return nbytes;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                      struct scatterlist *src, unsigned int nbytes)
+int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
+                           struct blkcipher_desc *desc,
+                           struct scatterlist *dst,
+                           struct scatterlist *src, unsigned int nbytes)
 {
+       const unsigned int bsize = 128 / 8;
        bool fpu_enabled = false;
        struct blkcipher_walk walk;
        int err;
 
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt(desc, &walk);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
 
        while ((nbytes = walk.nbytes)) {
-               fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-               nbytes = __cbc_decrypt(desc, &walk);
+               fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+                                            desc, fpu_enabled, nbytes);
+               nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
                err = blkcipher_walk_done(desc, &walk, nbytes);
        }
 
-       serpent_fpu_end(fpu_enabled);
+       glue_fpu_end(fpu_enabled);
        return err;
 }
 
@@ -289,109 +306,232 @@ static inline void u128_inc(u128 *i)
                i->a++;
 }
 
-static void ctr_crypt_final(struct blkcipher_desc *desc,
-                           struct blkcipher_walk *walk)
+static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
+                                       struct blkcipher_desc *desc,
+                                       struct blkcipher_walk *walk)
 {
-       struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       u8 *ctrblk = walk->iv;
-       u8 keystream[SERPENT_BLOCK_SIZE];
-       u8 *src = walk->src.virt.addr;
-       u8 *dst = walk->dst.virt.addr;
+       void *ctx = crypto_blkcipher_ctx(desc->tfm);
+       u8 *src = (u8 *)walk->src.virt.addr;
+       u8 *dst = (u8 *)walk->dst.virt.addr;
        unsigned int nbytes = walk->nbytes;
+       u128 ctrblk;
+       u128 tmp;
 
-       __serpent_encrypt(ctx, keystream, ctrblk);
-       crypto_xor(keystream, src, nbytes);
-       memcpy(dst, keystream, nbytes);
+       be128_to_u128(&ctrblk, (be128 *)walk->iv);
 
-       crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
+       memcpy(&tmp, src, nbytes);
+       fn_ctr(ctx, &tmp, &tmp, &ctrblk);
+       memcpy(dst, &tmp, nbytes);
+
+       u128_to_be128((be128 *)walk->iv, &ctrblk);
 }
 
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
-                               struct blkcipher_walk *walk)
+static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+                                           struct blkcipher_desc *desc,
+                                           struct blkcipher_walk *walk)
 {
-       struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       const unsigned int bsize = SERPENT_BLOCK_SIZE;
+       const unsigned int bsize = 128 / 8;
+       void *ctx = crypto_blkcipher_ctx(desc->tfm);
        unsigned int nbytes = walk->nbytes;
        u128 *src = (u128 *)walk->src.virt.addr;
        u128 *dst = (u128 *)walk->dst.virt.addr;
        u128 ctrblk;
-       be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
-       int i;
+       unsigned int num_blocks, func_bytes;
+       unsigned int i;
 
        be128_to_u128(&ctrblk, (be128 *)walk->iv);
 
        /* Process multi-block batch */
-       if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
-               do {
-                       /* create ctrblks for parallel encrypt */
-                       for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
-                               if (dst != src)
-                                       dst[i] = src[i];
-
-                               u128_to_be128(&ctrblocks[i], &ctrblk);
-                               u128_inc(&ctrblk);
-                       }
+       for (i = 0; i < gctx->num_funcs; i++) {
+               num_blocks = gctx->funcs[i].num_blocks;
+               func_bytes = bsize * num_blocks;
 
-                       serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
-                                                (u8 *)ctrblocks);
+               if (nbytes >= func_bytes) {
+                       do {
+                               gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
 
-                       src += SERPENT_PARALLEL_BLOCKS;
-                       dst += SERPENT_PARALLEL_BLOCKS;
-                       nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
-               } while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
+                               src += num_blocks;
+                               dst += num_blocks;
+                               nbytes -= func_bytes;
+                       } while (nbytes >= func_bytes);
 
-               if (nbytes < bsize)
-                       goto done;
+                       if (nbytes < bsize)
+                               goto done;
+               }
        }
 
-       /* Handle leftovers */
-       do {
-               if (dst != src)
-                       *dst = *src;
-
-               u128_to_be128(&ctrblocks[0], &ctrblk);
-               u128_inc(&ctrblk);
-
-               __serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
-               u128_xor(dst, dst, (u128 *)ctrblocks);
-
-               src += 1;
-               dst += 1;
-               nbytes -= bsize;
-       } while (nbytes >= bsize);
-
 done:
        u128_to_be128((be128 *)walk->iv, &ctrblk);
        return nbytes;
 }
 
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-                    struct scatterlist *src, unsigned int nbytes)
+int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
+                         struct blkcipher_desc *desc, struct scatterlist *dst,
+                         struct scatterlist *src, unsigned int nbytes)
 {
+       const unsigned int bsize = 128 / 8;
        bool fpu_enabled = false;
        struct blkcipher_walk walk;
        int err;
 
        blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+       err = blkcipher_walk_virt_block(desc, &walk, bsize);
 
-       while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
-               fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
-               nbytes = __ctr_crypt(desc, &walk);
+       while ((nbytes = walk.nbytes) >= bsize) {
+               fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+                                            desc, fpu_enabled, nbytes);
+               nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
                err = blkcipher_walk_done(desc, &walk, nbytes);
        }
 
-       serpent_fpu_end(fpu_enabled);
+       glue_fpu_end(fpu_enabled);
 
        if (walk.nbytes) {
-               ctr_crypt_final(desc, &walk);
+               glue_ctr_crypt_final_128bit(
+                       gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
                err = blkcipher_walk_done(desc, &walk, 0);
        }
 
        return err;
 }
 
+static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+       u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
+       unsigned int j;
+
+       for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+               ivs[j] = src[j];
+
+       serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+       for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
+               u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+{
+       be128 ctrblk;
+
+       u128_to_be128(&ctrblk, iv);
+       u128_inc(iv);
+
+       __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+       u128_xor(dst, src, (u128 *)&ctrblk);
+}
+
+static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+                                  u128 *iv)
+{
+       be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
+       unsigned int i;
+
+       for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
+               if (dst != src)
+                       dst[i] = src[i];
+
+               u128_to_be128(&ctrblks[i], iv);
+               u128_inc(iv);
+       }
+
+       serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx serpent_enc = {
+       .num_funcs = 2,
+       .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
+       } }
+};
+
+static const struct common_glue_ctx serpent_ctr = {
+       .num_funcs = 2,
+       .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
+       } }
+};
+
+static const struct common_glue_ctx serpent_dec = {
+       .num_funcs = 2,
+       .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+               .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
+       } }
+};
+
+static const struct common_glue_ctx serpent_dec_cbc = {
+       .num_funcs = 2,
+       .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
+
+       .funcs = { {
+               .num_blocks = SERPENT_PARALLEL_BLOCKS,
+               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
+       } }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
+                                    dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                      struct scatterlist *src, unsigned int nbytes)
+{
+       return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
+                                      nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                    struct scatterlist *src, unsigned int nbytes)
+{
+       return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+       return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
+                             NULL, fpu_enabled, nbytes);
+}
+
+static inline void serpent_fpu_end(bool fpu_enabled)
+{
+       glue_fpu_end(fpu_enabled);
+}
+
 struct crypt_priv {
        struct serpent_ctx *ctx;
        bool fpu_enabled;