1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
5 * Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
8 #include <linux/linkage.h>
9 #include <linux/cfi_types.h>
10 #include <asm/assembler.h>
12 .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
16 .macro sm3partw1, rd, rn, rm
17 .inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
20 .macro sm3partw2, rd, rn, rm
21 .inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
24 .macro sm3ss1, rd, rn, rm, ra
25 .inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
28 .macro sm3tt1a, rd, rn, rm, imm2
29 .inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
32 .macro sm3tt1b, rd, rn, rm, imm2
33 .inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
36 .macro sm3tt2a, rd, rn, rm, imm2
37 .inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
40 .macro sm3tt2b, rd, rn, rm, imm2
41 .inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
44 .macro round, ab, s0, t0, t1, i
45 sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s
46 shl \t1\().4s, \t0\().4s, #1
47 sri \t1\().4s, \t0\().4s, #31
48 sm3tt1\ab v8.4s, v5.4s, v10.4s, \i
49 sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i
52 .macro qround, ab, s0, s1, s2, s3, s4
54 ext \s4\().16b, \s1\().16b, \s2\().16b, #12
55 ext v6.16b, \s0\().16b, \s1\().16b, #12
56 ext v7.16b, \s2\().16b, \s3\().16b, #8
57 sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s
60 eor v10.16b, \s0\().16b, \s1\().16b
62 round \ab, \s0, v11, v12, 0
63 round \ab, \s0, v12, v11, 1
64 round \ab, \s0, v11, v12, 2
65 round \ab, \s0, v12, v11, 3
68 sm3partw2 \s4\().4s, v7.4s, v6.4s
73 * void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
77 SYM_TYPED_FUNC_START(sm3_ce_transform)
79 ld1 {v8.4s-v9.4s}, [x0]
82 ext v8.16b, v8.16b, v8.16b, #8
83 ext v9.16b, v9.16b, v9.16b, #8
89 0: ld1 {v0.16b-v3.16b}, [x1], #64
95 CPU_LE( rev32 v0.16b, v0.16b )
96 CPU_LE( rev32 v1.16b, v1.16b )
97 CPU_LE( rev32 v2.16b, v2.16b )
98 CPU_LE( rev32 v3.16b, v3.16b )
100 ext v11.16b, v13.16b, v13.16b, #4
102 qround a, v0, v1, v2, v3, v4
103 qround a, v1, v2, v3, v4, v0
104 qround a, v2, v3, v4, v0, v1
105 qround a, v3, v4, v0, v1, v2
107 ext v11.16b, v14.16b, v14.16b, #4
109 qround b, v4, v0, v1, v2, v3
110 qround b, v0, v1, v2, v3, v4
111 qround b, v1, v2, v3, v4, v0
112 qround b, v2, v3, v4, v0, v1
113 qround b, v3, v4, v0, v1, v2
114 qround b, v4, v0, v1, v2, v3
115 qround b, v0, v1, v2, v3, v4
116 qround b, v1, v2, v3, v4, v0
117 qround b, v2, v3, v4, v0, v1
122 eor v8.16b, v8.16b, v15.16b
123 eor v9.16b, v9.16b, v16.16b
125 /* handled all input blocks? */
131 ext v8.16b, v8.16b, v8.16b, #8
132 ext v9.16b, v9.16b, v9.16b, #8
133 st1 {v8.4s-v9.4s}, [x0]
135 SYM_FUNC_END(sm3_ce_transform)
137 .section ".rodata", "a"
139 .Lt: .word 0x79cc4519, 0x9d8a7a87