armv8 SHA-1 using ARMv8 Crypto Extensions:
authorLoic Poulain <loic.poulain@linaro.org>
Wed, 1 Jun 2022 18:26:29 +0000 (20:26 +0200)
committerTom Rini <trini@konsulko.com>
Mon, 27 Jun 2022 17:36:28 +0000 (13:36 -0400)
This patch adds support for the SHA-1 Secure Hash Algorithm for CPUs
that have support for the SHA-1 part of the ARM v8 Crypto Extensions.

It greatly improves sha-1 based operations, about 10x faster on iMX8M
evk board. ~12ms vs ~165ms for a 20MiB kernel sha-1 verification.

asm implementation is a simplified version of the Linux version (from
Ard Biesheuvel).

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
arch/arm/cpu/armv8/Kconfig
arch/arm/cpu/armv8/Makefile
arch/arm/cpu/armv8/sha1_ce_core.S [new file with mode: 0644]
arch/arm/cpu/armv8/sha1_ce_glue.c [new file with mode: 0644]

index 09f3f50..1768e67 100644 (file)
@@ -185,4 +185,15 @@ config ARMV8_EA_EL3_FIRST
          Exception handling at all exception levels for External Abort and
          SError interrupt exception are taken in EL3.
 
+menuconfig ARMV8_CRYPTO
+       bool "ARM64 Accelerated Cryptographic Algorithms"
+
+if ARMV8_CRYPTO
+
+config ARMV8_CE_SHA1
+       bool "SHA-1 digest algorithm (ARMv8 Crypto Extensions)"
+       default y if SHA1
+
+endif
+
 endif
index 85fe047..ff2495c 100644 (file)
@@ -44,3 +44,4 @@ obj-$(CONFIG_TARGET_HIKEY) += hisilicon/
 obj-$(CONFIG_ARMV8_PSCI) += psci.o
 obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
 obj-$(CONFIG_XEN) += xen/
+obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
diff --git a/arch/arm/cpu/armv8/sha1_ce_core.S b/arch/arm/cpu/armv8/sha1_ce_core.S
new file mode 100644 (file)
index 0000000..fbf2714
--- /dev/null
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * sha1_ce_core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2022 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <config.h>
+#include <linux/linkage.h>
+#include <asm/system.h>
+#include <asm/macro.h>
+
+       .text
+       .arch           armv8-a+crypto
+
+       k0              .req    v0
+       k1              .req    v1
+       k2              .req    v2
+       k3              .req    v3
+
+       t0              .req    v4
+       t1              .req    v5
+
+       dga             .req    q6
+       dgav            .req    v6
+       dgb             .req    s7
+       dgbv            .req    v7
+
+       dg0q            .req    q12
+       dg0s            .req    s12
+       dg0v            .req    v12
+       dg1s            .req    s13
+       dg1v            .req    v13
+       dg2s            .req    s14
+
+       .macro          add_only, op, ev, rc, s0, dg1
+       .ifc            \ev, ev
+       add             t1.4s, v\s0\().4s, \rc\().4s
+       sha1h           dg2s, dg0s
+       .ifnb           \dg1
+       sha1\op         dg0q, \dg1, t0.4s
+       .else
+       sha1\op         dg0q, dg1s, t0.4s
+       .endif
+       .else
+       .ifnb           \s0
+       add             t0.4s, v\s0\().4s, \rc\().4s
+       .endif
+       sha1h           dg1s, dg0s
+       sha1\op         dg0q, dg2s, t1.4s
+       .endif
+       .endm
+
+       .macro          add_update, op, ev, rc, s0, s1, s2, s3, dg1
+       sha1su0         v\s0\().4s, v\s1\().4s, v\s2\().4s
+       add_only        \op, \ev, \rc, \s1, \dg1
+       sha1su1         v\s0\().4s, v\s3\().4s
+       .endm
+
+       .macro          loadrc, k, val, tmp
+       movz            \tmp, :abs_g0_nc:\val
+       movk            \tmp, :abs_g1:\val
+       dup             \k, \tmp
+       .endm
+
+       /*
+        * void sha1_armv8_ce_process(uint32_t state[5], uint8_t const *src,
+        *                            uint32_t blocks)
+        */
+ENTRY(sha1_armv8_ce_process)
+       /* load round constants */
+       loadrc          k0.4s, 0x5a827999, w6
+       loadrc          k1.4s, 0x6ed9eba1, w6
+       loadrc          k2.4s, 0x8f1bbcdc, w6
+       loadrc          k3.4s, 0xca62c1d6, w6
+
+       /* load state (4+1 digest states) */
+       ld1             {dgav.4s}, [x0]
+       ldr             dgb, [x0, #16]
+
+       /* load input (64 bytes into v8->v11 16B vectors) */
+0:     ld1             {v8.4s-v11.4s}, [x1], #64
+       sub             w2, w2, #1
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+       rev32           v8.16b, v8.16b
+       rev32           v9.16b, v9.16b
+       rev32           v10.16b, v10.16b
+       rev32           v11.16b, v11.16b
+#endif
+
+1:     add             t0.4s, v8.4s, k0.4s
+       mov             dg0v.16b, dgav.16b
+
+       add_update      c, ev, k0,  8,  9, 10, 11, dgb
+       add_update      c, od, k0,  9, 10, 11,  8
+       add_update      c, ev, k0, 10, 11,  8,  9
+       add_update      c, od, k0, 11,  8,  9, 10
+       add_update      c, ev, k1,  8,  9, 10, 11
+
+       add_update      p, od, k1,  9, 10, 11,  8
+       add_update      p, ev, k1, 10, 11,  8,  9
+       add_update      p, od, k1, 11,  8,  9, 10
+       add_update      p, ev, k1,  8,  9, 10, 11
+       add_update      p, od, k2,  9, 10, 11,  8
+
+       add_update      m, ev, k2, 10, 11,  8,  9
+       add_update      m, od, k2, 11,  8,  9, 10
+       add_update      m, ev, k2,  8,  9, 10, 11
+       add_update      m, od, k2,  9, 10, 11,  8
+       add_update      m, ev, k3, 10, 11,  8,  9
+
+       add_update      p, od, k3, 11,  8,  9, 10
+       add_only        p, ev, k3,  9
+       add_only        p, od, k3, 10
+       add_only        p, ev, k3, 11
+       add_only        p, od
+
+       /* update state */
+       add             dgbv.2s, dgbv.2s, dg1v.2s
+       add             dgav.4s, dgav.4s, dg0v.4s
+
+       /* loop on next block? */
+       cbz             w2, 2f
+       b               0b
+
+       /* store new state */
+2:     st1             {dgav.4s}, [x0]
+       str             dgb, [x0, #16]
+       mov             w0, w2
+       ret
+ENDPROC(sha1_armv8_ce_process)
diff --git a/arch/arm/cpu/armv8/sha1_ce_glue.c b/arch/arm/cpu/armv8/sha1_ce_glue.c
new file mode 100644 (file)
index 0000000..780b119
--- /dev/null
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sha1_ce_glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2022 Linaro Ltd <loic.poulain@linaro.org>
+ */
+
+#include <common.h>
+#include <u-boot/sha1.h>
+
+extern void sha1_armv8_ce_process(uint32_t state[5], uint8_t const *src,
+                                 uint32_t blocks);
+
+void sha1_process(sha1_context *ctx, const unsigned char *data,
+                 unsigned int blocks)
+{
+       if (!blocks)
+               return;
+
+       sha1_armv8_ce_process(ctx->state, data, blocks);
+}