vmovdqu x3, (3*4*4)(out);
.align 8
-.global __serpent_enc_blk_8way
-.type __serpent_enc_blk_8way,@function;
+.global __serpent_enc_blk_8way_avx
+.type __serpent_enc_blk_8way_avx,@function;
-__serpent_enc_blk_8way:
+__serpent_enc_blk_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
ret;
.align 8
-.global serpent_dec_blk_8way
-.type serpent_dec_blk_8way,@function;
+.global serpent_dec_blk_8way_avx
+.type serpent_dec_blk_8way_avx,@function;
-serpent_dec_blk_8way:
+serpent_dec_blk_8way_avx:
/* input:
* %rdi: ctx, CTX
* %rsi: dst
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
-#include <asm/serpent.h>
+#include <asm/serpent-avx.h>
#include <crypto/scatterwalk.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/i387.h>
-#include <asm/serpent.h>
+#include <asm/serpent-sse2.h>
#include <crypto/scatterwalk.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
--- /dev/null
+#ifndef ASM_X86_SERPENT_AVX_H
+#define ASM_X86_SERPENT_AVX_H
+
+#include <linux/crypto.h>
+#include <crypto/serpent.h>
+
+#define SERPENT_PARALLEL_BLOCKS 8
+
+asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __serpent_enc_blk_8way_avx(ctx, dst, src, false);
+}
+
+static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __serpent_enc_blk_8way_avx(ctx, dst, src, true);
+}
+
+static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ serpent_dec_blk_8way_avx(ctx, dst, src);
+}
+
+#endif
-#ifndef ASM_X86_SERPENT_H
-#define ASM_X86_SERPENT_H
+#ifndef ASM_X86_SERPENT_SSE2_H
+#define ASM_X86_SERPENT_SSE2_H
#include <linux/crypto.h>
#include <crypto/serpent.h>