Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / boringssl / linux-arm / crypto / sha / sha512-armv4.S
1 #if defined(__arm__)
2 #include "arm_arch.h"
3 #ifdef __ARMEL__
4 # define LO 0
5 # define HI 4
6 # define WORD64(hi0,lo0,hi1,lo1)        .word   lo0,hi0, lo1,hi1
7 #else
8 # define HI 0
9 # define LO 4
10 # define WORD64(hi0,lo0,hi1,lo1)        .word   hi0,lo0, hi1,lo1
11 #endif
12
13 .text
14 .code   32
15 .type   K512,%object
16 .align  5
17 K512:
18 WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
19 WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
20 WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
21 WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
22 WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
23 WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
24 WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
25 WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
26 WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
27 WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
28 WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
29 WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
30 WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
31 WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
32 WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
33 WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
34 WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
35 WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
36 WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
37 WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
38 WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
39 WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
40 WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
41 WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
42 WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
43 WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
44 WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
45 WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
46 WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
47 WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
48 WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
49 WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
50 WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
51 WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
52 WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
53 WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
54 WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
55 WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
56 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
57 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
58 .size   K512,.-K512
59 .LOPENSSL_armcap:
60 .word   OPENSSL_armcap_P-sha512_block_data_order
61 .skip   32-4
62
63 .global sha512_block_data_order
64 .hidden sha512_block_data_order
65 .type   sha512_block_data_order,%function
66 sha512_block_data_order:
67         sub     r3,pc,#8                @ sha512_block_data_order
68         add     r2,r1,r2,lsl#7  @ len to point at the end of inp
69 #if __ARM_ARCH__>=7
70         ldr     r12,.LOPENSSL_armcap
71         ldr     r12,[r3,r12]            @ OPENSSL_armcap_P
72         tst     r12,#1
73         bne     .LNEON
74 #endif
75         stmdb   sp!,{r4-r12,lr}
76         sub     r14,r3,#672             @ K512
77         sub     sp,sp,#9*8
78
79         ldr     r7,[r0,#32+LO]
80         ldr     r8,[r0,#32+HI]
81         ldr     r9, [r0,#48+LO]
82         ldr     r10, [r0,#48+HI]
83         ldr     r11, [r0,#56+LO]
84         ldr     r12, [r0,#56+HI]
85 .Loop:
86         str     r9, [sp,#48+0]
87         str     r10, [sp,#48+4]
88         str     r11, [sp,#56+0]
89         str     r12, [sp,#56+4]
90         ldr     r5,[r0,#0+LO]
91         ldr     r6,[r0,#0+HI]
92         ldr     r3,[r0,#8+LO]
93         ldr     r4,[r0,#8+HI]
94         ldr     r9, [r0,#16+LO]
95         ldr     r10, [r0,#16+HI]
96         ldr     r11, [r0,#24+LO]
97         ldr     r12, [r0,#24+HI]
98         str     r3,[sp,#8+0]
99         str     r4,[sp,#8+4]
100         str     r9, [sp,#16+0]
101         str     r10, [sp,#16+4]
102         str     r11, [sp,#24+0]
103         str     r12, [sp,#24+4]
104         ldr     r3,[r0,#40+LO]
105         ldr     r4,[r0,#40+HI]
106         str     r3,[sp,#40+0]
107         str     r4,[sp,#40+4]
108
109 .L00_15:
110 #if __ARM_ARCH__<7
111         ldrb    r3,[r1,#7]
112         ldrb    r9, [r1,#6]
113         ldrb    r10, [r1,#5]
114         ldrb    r11, [r1,#4]
115         ldrb    r4,[r1,#3]
116         ldrb    r12, [r1,#2]
117         orr     r3,r3,r9,lsl#8
118         ldrb    r9, [r1,#1]
119         orr     r3,r3,r10,lsl#16
120         ldrb    r10, [r1],#8
121         orr     r3,r3,r11,lsl#24
122         orr     r4,r4,r12,lsl#8
123         orr     r4,r4,r9,lsl#16
124         orr     r4,r4,r10,lsl#24
125 #else
126         ldr     r3,[r1,#4]
127         ldr     r4,[r1],#8
128 #ifdef __ARMEL__
129         rev     r3,r3
130         rev     r4,r4
131 #endif
132 #endif
133         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
134         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
135         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
136         mov     r9,r7,lsr#14
137         str     r3,[sp,#64+0]
138         mov     r10,r8,lsr#14
139         str     r4,[sp,#64+4]
140         eor     r9,r9,r8,lsl#18
141         ldr     r11,[sp,#56+0]  @ h.lo
142         eor     r10,r10,r7,lsl#18
143         ldr     r12,[sp,#56+4]  @ h.hi
144         eor     r9,r9,r7,lsr#18
145         eor     r10,r10,r8,lsr#18
146         eor     r9,r9,r8,lsl#14
147         eor     r10,r10,r7,lsl#14
148         eor     r9,r9,r8,lsr#9
149         eor     r10,r10,r7,lsr#9
150         eor     r9,r9,r7,lsl#23
151         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
152         adds    r3,r3,r9
153         ldr     r9,[sp,#40+0]   @ f.lo
154         adc     r4,r4,r10               @ T += Sigma1(e)
155         ldr     r10,[sp,#40+4]  @ f.hi
156         adds    r3,r3,r11
157         ldr     r11,[sp,#48+0]  @ g.lo
158         adc     r4,r4,r12               @ T += h
159         ldr     r12,[sp,#48+4]  @ g.hi
160
161         eor     r9,r9,r11
162         str     r7,[sp,#32+0]
163         eor     r10,r10,r12
164         str     r8,[sp,#32+4]
165         and     r9,r9,r7
166         str     r5,[sp,#0+0]
167         and     r10,r10,r8
168         str     r6,[sp,#0+4]
169         eor     r9,r9,r11
170         ldr     r11,[r14,#LO]   @ K[i].lo
171         eor     r10,r10,r12             @ Ch(e,f,g)
172         ldr     r12,[r14,#HI]   @ K[i].hi
173
174         adds    r3,r3,r9
175         ldr     r7,[sp,#24+0]   @ d.lo
176         adc     r4,r4,r10               @ T += Ch(e,f,g)
177         ldr     r8,[sp,#24+4]   @ d.hi
178         adds    r3,r3,r11
179         and     r9,r11,#0xff
180         adc     r4,r4,r12               @ T += K[i]
181         adds    r7,r7,r3
182         ldr     r11,[sp,#8+0]   @ b.lo
183         adc     r8,r8,r4                @ d += T
184         teq     r9,#148
185
186         ldr     r12,[sp,#16+0]  @ c.lo
187         orreq   r14,r14,#1
188         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
189         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
190         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
191         mov     r9,r5,lsr#28
192         mov     r10,r6,lsr#28
193         eor     r9,r9,r6,lsl#4
194         eor     r10,r10,r5,lsl#4
195         eor     r9,r9,r6,lsr#2
196         eor     r10,r10,r5,lsr#2
197         eor     r9,r9,r5,lsl#30
198         eor     r10,r10,r6,lsl#30
199         eor     r9,r9,r6,lsr#7
200         eor     r10,r10,r5,lsr#7
201         eor     r9,r9,r5,lsl#25
202         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
203         adds    r3,r3,r9
204         and     r9,r5,r11
205         adc     r4,r4,r10               @ T += Sigma0(a)
206
207         ldr     r10,[sp,#8+4]   @ b.hi
208         orr     r5,r5,r11
209         ldr     r11,[sp,#16+4]  @ c.hi
210         and     r5,r5,r12
211         and     r12,r6,r10
212         orr     r6,r6,r10
213         orr     r5,r5,r9                @ Maj(a,b,c).lo
214         and     r6,r6,r11
215         adds    r5,r5,r3
216         orr     r6,r6,r12               @ Maj(a,b,c).hi
217         sub     sp,sp,#8
218         adc     r6,r6,r4                @ h += T
219         tst     r14,#1
220         add     r14,r14,#8
221         tst     r14,#1
222         beq     .L00_15
223         ldr     r9,[sp,#184+0]
224         ldr     r10,[sp,#184+4]
225         bic     r14,r14,#1
226 .L16_79:
227         @ sigma0(x)     (ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
228         @ LO            lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
229         @ HI            hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
230         mov     r3,r9,lsr#1
231         ldr     r11,[sp,#80+0]
232         mov     r4,r10,lsr#1
233         ldr     r12,[sp,#80+4]
234         eor     r3,r3,r10,lsl#31
235         eor     r4,r4,r9,lsl#31
236         eor     r3,r3,r9,lsr#8
237         eor     r4,r4,r10,lsr#8
238         eor     r3,r3,r10,lsl#24
239         eor     r4,r4,r9,lsl#24
240         eor     r3,r3,r9,lsr#7
241         eor     r4,r4,r10,lsr#7
242         eor     r3,r3,r10,lsl#25
243
244         @ sigma1(x)     (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
245         @ LO            lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
246         @ HI            hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
247         mov     r9,r11,lsr#19
248         mov     r10,r12,lsr#19
249         eor     r9,r9,r12,lsl#13
250         eor     r10,r10,r11,lsl#13
251         eor     r9,r9,r12,lsr#29
252         eor     r10,r10,r11,lsr#29
253         eor     r9,r9,r11,lsl#3
254         eor     r10,r10,r12,lsl#3
255         eor     r9,r9,r11,lsr#6
256         eor     r10,r10,r12,lsr#6
257         ldr     r11,[sp,#120+0]
258         eor     r9,r9,r12,lsl#26
259
260         ldr     r12,[sp,#120+4]
261         adds    r3,r3,r9
262         ldr     r9,[sp,#192+0]
263         adc     r4,r4,r10
264
265         ldr     r10,[sp,#192+4]
266         adds    r3,r3,r11
267         adc     r4,r4,r12
268         adds    r3,r3,r9
269         adc     r4,r4,r10
270         @ Sigma1(x)     (ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
271         @ LO            lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
272         @ HI            hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
273         mov     r9,r7,lsr#14
274         str     r3,[sp,#64+0]
275         mov     r10,r8,lsr#14
276         str     r4,[sp,#64+4]
277         eor     r9,r9,r8,lsl#18
278         ldr     r11,[sp,#56+0]  @ h.lo
279         eor     r10,r10,r7,lsl#18
280         ldr     r12,[sp,#56+4]  @ h.hi
281         eor     r9,r9,r7,lsr#18
282         eor     r10,r10,r8,lsr#18
283         eor     r9,r9,r8,lsl#14
284         eor     r10,r10,r7,lsl#14
285         eor     r9,r9,r8,lsr#9
286         eor     r10,r10,r7,lsr#9
287         eor     r9,r9,r7,lsl#23
288         eor     r10,r10,r8,lsl#23       @ Sigma1(e)
289         adds    r3,r3,r9
290         ldr     r9,[sp,#40+0]   @ f.lo
291         adc     r4,r4,r10               @ T += Sigma1(e)
292         ldr     r10,[sp,#40+4]  @ f.hi
293         adds    r3,r3,r11
294         ldr     r11,[sp,#48+0]  @ g.lo
295         adc     r4,r4,r12               @ T += h
296         ldr     r12,[sp,#48+4]  @ g.hi
297
298         eor     r9,r9,r11
299         str     r7,[sp,#32+0]
300         eor     r10,r10,r12
301         str     r8,[sp,#32+4]
302         and     r9,r9,r7
303         str     r5,[sp,#0+0]
304         and     r10,r10,r8
305         str     r6,[sp,#0+4]
306         eor     r9,r9,r11
307         ldr     r11,[r14,#LO]   @ K[i].lo
308         eor     r10,r10,r12             @ Ch(e,f,g)
309         ldr     r12,[r14,#HI]   @ K[i].hi
310
311         adds    r3,r3,r9
312         ldr     r7,[sp,#24+0]   @ d.lo
313         adc     r4,r4,r10               @ T += Ch(e,f,g)
314         ldr     r8,[sp,#24+4]   @ d.hi
315         adds    r3,r3,r11
316         and     r9,r11,#0xff
317         adc     r4,r4,r12               @ T += K[i]
318         adds    r7,r7,r3
319         ldr     r11,[sp,#8+0]   @ b.lo
320         adc     r8,r8,r4                @ d += T
321         teq     r9,#23
322
323         ldr     r12,[sp,#16+0]  @ c.lo
324         orreq   r14,r14,#1
325         @ Sigma0(x)     (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
326         @ LO            lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
327         @ HI            hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
328         mov     r9,r5,lsr#28
329         mov     r10,r6,lsr#28
330         eor     r9,r9,r6,lsl#4
331         eor     r10,r10,r5,lsl#4
332         eor     r9,r9,r6,lsr#2
333         eor     r10,r10,r5,lsr#2
334         eor     r9,r9,r5,lsl#30
335         eor     r10,r10,r6,lsl#30
336         eor     r9,r9,r6,lsr#7
337         eor     r10,r10,r5,lsr#7
338         eor     r9,r9,r5,lsl#25
339         eor     r10,r10,r6,lsl#25       @ Sigma0(a)
340         adds    r3,r3,r9
341         and     r9,r5,r11
342         adc     r4,r4,r10               @ T += Sigma0(a)
343
344         ldr     r10,[sp,#8+4]   @ b.hi
345         orr     r5,r5,r11
346         ldr     r11,[sp,#16+4]  @ c.hi
347         and     r5,r5,r12
348         and     r12,r6,r10
349         orr     r6,r6,r10
350         orr     r5,r5,r9                @ Maj(a,b,c).lo
351         and     r6,r6,r11
352         adds    r5,r5,r3
353         orr     r6,r6,r12               @ Maj(a,b,c).hi
354         sub     sp,sp,#8
355         adc     r6,r6,r4                @ h += T
356         tst     r14,#1
357         add     r14,r14,#8
358         ldreq   r9,[sp,#184+0]
359         ldreq   r10,[sp,#184+4]
360         beq     .L16_79
361         bic     r14,r14,#1
362
363         ldr     r3,[sp,#8+0]
364         ldr     r4,[sp,#8+4]
365         ldr     r9, [r0,#0+LO]
366         ldr     r10, [r0,#0+HI]
367         ldr     r11, [r0,#8+LO]
368         ldr     r12, [r0,#8+HI]
369         adds    r9,r5,r9
370         str     r9, [r0,#0+LO]
371         adc     r10,r6,r10
372         str     r10, [r0,#0+HI]
373         adds    r11,r3,r11
374         str     r11, [r0,#8+LO]
375         adc     r12,r4,r12
376         str     r12, [r0,#8+HI]
377
378         ldr     r5,[sp,#16+0]
379         ldr     r6,[sp,#16+4]
380         ldr     r3,[sp,#24+0]
381         ldr     r4,[sp,#24+4]
382         ldr     r9, [r0,#16+LO]
383         ldr     r10, [r0,#16+HI]
384         ldr     r11, [r0,#24+LO]
385         ldr     r12, [r0,#24+HI]
386         adds    r9,r5,r9
387         str     r9, [r0,#16+LO]
388         adc     r10,r6,r10
389         str     r10, [r0,#16+HI]
390         adds    r11,r3,r11
391         str     r11, [r0,#24+LO]
392         adc     r12,r4,r12
393         str     r12, [r0,#24+HI]
394
395         ldr     r3,[sp,#40+0]
396         ldr     r4,[sp,#40+4]
397         ldr     r9, [r0,#32+LO]
398         ldr     r10, [r0,#32+HI]
399         ldr     r11, [r0,#40+LO]
400         ldr     r12, [r0,#40+HI]
401         adds    r7,r7,r9
402         str     r7,[r0,#32+LO]
403         adc     r8,r8,r10
404         str     r8,[r0,#32+HI]
405         adds    r11,r3,r11
406         str     r11, [r0,#40+LO]
407         adc     r12,r4,r12
408         str     r12, [r0,#40+HI]
409
410         ldr     r5,[sp,#48+0]
411         ldr     r6,[sp,#48+4]
412         ldr     r3,[sp,#56+0]
413         ldr     r4,[sp,#56+4]
414         ldr     r9, [r0,#48+LO]
415         ldr     r10, [r0,#48+HI]
416         ldr     r11, [r0,#56+LO]
417         ldr     r12, [r0,#56+HI]
418         adds    r9,r5,r9
419         str     r9, [r0,#48+LO]
420         adc     r10,r6,r10
421         str     r10, [r0,#48+HI]
422         adds    r11,r3,r11
423         str     r11, [r0,#56+LO]
424         adc     r12,r4,r12
425         str     r12, [r0,#56+HI]
426
427         add     sp,sp,#640
428         sub     r14,r14,#640
429
430         teq     r1,r2
431         bne     .Loop
432
433         add     sp,sp,#8*9              @ destroy frame
434 #if __ARM_ARCH__>=5
435         ldmia   sp!,{r4-r12,pc}
436 #else
437         ldmia   sp!,{r4-r12,lr}
438         tst     lr,#1
439         moveq   pc,lr                   @ be binary compatible with V4, yet
440         .word   0xe12fff1e                      @ interoperable with Thumb ISA:-)
441 #endif
442 #if __ARM_ARCH__>=7
443 .fpu    neon
444
445 .align  4
446 .LNEON:
447         dmb                             @ errata #451034 on early Cortex A8
448         vstmdb  sp!,{d8-d15}            @ ABI specification says so
449         sub     r3,r3,#672              @ K512
450         vldmia  r0,{d16-d23}            @ load context
451 .Loop_neon:
452         vshr.u64        d24,d20,#14     @ 0
453 #if 0<16
454         vld1.64         {d0},[r1]!      @ handles unaligned
455 #endif
456         vshr.u64        d25,d20,#18
457 #if 0>0
458          vadd.i64       d16,d30                 @ h+=Maj from the past
459 #endif
460         vshr.u64        d26,d20,#41
461         vld1.64         {d28},[r3,:64]! @ K[i++]
462         vsli.64         d24,d20,#50
463         vsli.64         d25,d20,#46
464         vmov            d29,d20
465         vsli.64         d26,d20,#23
466 #if 0<16 && defined(__ARMEL__)
467         vrev64.8        d0,d0
468 #endif
469         veor            d25,d24
470         vbsl            d29,d21,d22             @ Ch(e,f,g)
471         vshr.u64        d24,d16,#28
472         veor            d26,d25                 @ Sigma1(e)
473         vadd.i64        d27,d29,d23
474         vshr.u64        d25,d16,#34
475         vsli.64         d24,d16,#36
476         vadd.i64        d27,d26
477         vshr.u64        d26,d16,#39
478         vadd.i64        d28,d0
479         vsli.64         d25,d16,#30
480         veor            d30,d16,d17
481         vsli.64         d26,d16,#25
482         veor            d23,d24,d25
483         vadd.i64        d27,d28
484         vbsl            d30,d18,d17             @ Maj(a,b,c)
485         veor            d23,d26                 @ Sigma0(a)
486         vadd.i64        d19,d27
487         vadd.i64        d30,d27
488         @ vadd.i64      d23,d30
489         vshr.u64        d24,d19,#14     @ 1
490 #if 1<16
491         vld1.64         {d1},[r1]!      @ handles unaligned
492 #endif
493         vshr.u64        d25,d19,#18
494 #if 1>0
495          vadd.i64       d23,d30                 @ h+=Maj from the past
496 #endif
497         vshr.u64        d26,d19,#41
498         vld1.64         {d28},[r3,:64]! @ K[i++]
499         vsli.64         d24,d19,#50
500         vsli.64         d25,d19,#46
501         vmov            d29,d19
502         vsli.64         d26,d19,#23
503 #if 1<16 && defined(__ARMEL__)
504         vrev64.8        d1,d1
505 #endif
506         veor            d25,d24
507         vbsl            d29,d20,d21             @ Ch(e,f,g)
508         vshr.u64        d24,d23,#28
509         veor            d26,d25                 @ Sigma1(e)
510         vadd.i64        d27,d29,d22
511         vshr.u64        d25,d23,#34
512         vsli.64         d24,d23,#36
513         vadd.i64        d27,d26
514         vshr.u64        d26,d23,#39
515         vadd.i64        d28,d1
516         vsli.64         d25,d23,#30
517         veor            d30,d23,d16
518         vsli.64         d26,d23,#25
519         veor            d22,d24,d25
520         vadd.i64        d27,d28
521         vbsl            d30,d17,d16             @ Maj(a,b,c)
522         veor            d22,d26                 @ Sigma0(a)
523         vadd.i64        d18,d27
524         vadd.i64        d30,d27
525         @ vadd.i64      d22,d30
526         vshr.u64        d24,d18,#14     @ 2
527 #if 2<16
528         vld1.64         {d2},[r1]!      @ handles unaligned
529 #endif
530         vshr.u64        d25,d18,#18
531 #if 2>0
532          vadd.i64       d22,d30                 @ h+=Maj from the past
533 #endif
534         vshr.u64        d26,d18,#41
535         vld1.64         {d28},[r3,:64]! @ K[i++]
536         vsli.64         d24,d18,#50
537         vsli.64         d25,d18,#46
538         vmov            d29,d18
539         vsli.64         d26,d18,#23
540 #if 2<16 && defined(__ARMEL__)
541         vrev64.8        d2,d2
542 #endif
543         veor            d25,d24
544         vbsl            d29,d19,d20             @ Ch(e,f,g)
545         vshr.u64        d24,d22,#28
546         veor            d26,d25                 @ Sigma1(e)
547         vadd.i64        d27,d29,d21
548         vshr.u64        d25,d22,#34
549         vsli.64         d24,d22,#36
550         vadd.i64        d27,d26
551         vshr.u64        d26,d22,#39
552         vadd.i64        d28,d2
553         vsli.64         d25,d22,#30
554         veor            d30,d22,d23
555         vsli.64         d26,d22,#25
556         veor            d21,d24,d25
557         vadd.i64        d27,d28
558         vbsl            d30,d16,d23             @ Maj(a,b,c)
559         veor            d21,d26                 @ Sigma0(a)
560         vadd.i64        d17,d27
561         vadd.i64        d30,d27
562         @ vadd.i64      d21,d30
563         vshr.u64        d24,d17,#14     @ 3
564 #if 3<16
565         vld1.64         {d3},[r1]!      @ handles unaligned
566 #endif
567         vshr.u64        d25,d17,#18
568 #if 3>0
569          vadd.i64       d21,d30                 @ h+=Maj from the past
570 #endif
571         vshr.u64        d26,d17,#41
572         vld1.64         {d28},[r3,:64]! @ K[i++]
573         vsli.64         d24,d17,#50
574         vsli.64         d25,d17,#46
575         vmov            d29,d17
576         vsli.64         d26,d17,#23
577 #if 3<16 && defined(__ARMEL__)
578         vrev64.8        d3,d3
579 #endif
580         veor            d25,d24
581         vbsl            d29,d18,d19             @ Ch(e,f,g)
582         vshr.u64        d24,d21,#28
583         veor            d26,d25                 @ Sigma1(e)
584         vadd.i64        d27,d29,d20
585         vshr.u64        d25,d21,#34
586         vsli.64         d24,d21,#36
587         vadd.i64        d27,d26
588         vshr.u64        d26,d21,#39
589         vadd.i64        d28,d3
590         vsli.64         d25,d21,#30
591         veor            d30,d21,d22
592         vsli.64         d26,d21,#25
593         veor            d20,d24,d25
594         vadd.i64        d27,d28
595         vbsl            d30,d23,d22             @ Maj(a,b,c)
596         veor            d20,d26                 @ Sigma0(a)
597         vadd.i64        d16,d27
598         vadd.i64        d30,d27
599         @ vadd.i64      d20,d30
600         vshr.u64        d24,d16,#14     @ 4
601 #if 4<16
602         vld1.64         {d4},[r1]!      @ handles unaligned
603 #endif
604         vshr.u64        d25,d16,#18
605 #if 4>0
606          vadd.i64       d20,d30                 @ h+=Maj from the past
607 #endif
608         vshr.u64        d26,d16,#41
609         vld1.64         {d28},[r3,:64]! @ K[i++]
610         vsli.64         d24,d16,#50
611         vsli.64         d25,d16,#46
612         vmov            d29,d16
613         vsli.64         d26,d16,#23
614 #if 4<16 && defined(__ARMEL__)
615         vrev64.8        d4,d4
616 #endif
617         veor            d25,d24
618         vbsl            d29,d17,d18             @ Ch(e,f,g)
619         vshr.u64        d24,d20,#28
620         veor            d26,d25                 @ Sigma1(e)
621         vadd.i64        d27,d29,d19
622         vshr.u64        d25,d20,#34
623         vsli.64         d24,d20,#36
624         vadd.i64        d27,d26
625         vshr.u64        d26,d20,#39
626         vadd.i64        d28,d4
627         vsli.64         d25,d20,#30
628         veor            d30,d20,d21
629         vsli.64         d26,d20,#25
630         veor            d19,d24,d25
631         vadd.i64        d27,d28
632         vbsl            d30,d22,d21             @ Maj(a,b,c)
633         veor            d19,d26                 @ Sigma0(a)
634         vadd.i64        d23,d27
635         vadd.i64        d30,d27
636         @ vadd.i64      d19,d30
637         vshr.u64        d24,d23,#14     @ 5
638 #if 5<16
639         vld1.64         {d5},[r1]!      @ handles unaligned
640 #endif
641         vshr.u64        d25,d23,#18
642 #if 5>0
643          vadd.i64       d19,d30                 @ h+=Maj from the past
644 #endif
645         vshr.u64        d26,d23,#41
646         vld1.64         {d28},[r3,:64]! @ K[i++]
647         vsli.64         d24,d23,#50
648         vsli.64         d25,d23,#46
649         vmov            d29,d23
650         vsli.64         d26,d23,#23
651 #if 5<16 && defined(__ARMEL__)
652         vrev64.8        d5,d5
653 #endif
654         veor            d25,d24
655         vbsl            d29,d16,d17             @ Ch(e,f,g)
656         vshr.u64        d24,d19,#28
657         veor            d26,d25                 @ Sigma1(e)
658         vadd.i64        d27,d29,d18
659         vshr.u64        d25,d19,#34
660         vsli.64         d24,d19,#36
661         vadd.i64        d27,d26
662         vshr.u64        d26,d19,#39
663         vadd.i64        d28,d5
664         vsli.64         d25,d19,#30
665         veor            d30,d19,d20
666         vsli.64         d26,d19,#25
667         veor            d18,d24,d25
668         vadd.i64        d27,d28
669         vbsl            d30,d21,d20             @ Maj(a,b,c)
670         veor            d18,d26                 @ Sigma0(a)
671         vadd.i64        d22,d27
672         vadd.i64        d30,d27
673         @ vadd.i64      d18,d30
674         vshr.u64        d24,d22,#14     @ 6
675 #if 6<16
676         vld1.64         {d6},[r1]!      @ handles unaligned
677 #endif
678         vshr.u64        d25,d22,#18
679 #if 6>0
680          vadd.i64       d18,d30                 @ h+=Maj from the past
681 #endif
682         vshr.u64        d26,d22,#41
683         vld1.64         {d28},[r3,:64]! @ K[i++]
684         vsli.64         d24,d22,#50
685         vsli.64         d25,d22,#46
686         vmov            d29,d22
687         vsli.64         d26,d22,#23
688 #if 6<16 && defined(__ARMEL__)
689         vrev64.8        d6,d6
690 #endif
691         veor            d25,d24
692         vbsl            d29,d23,d16             @ Ch(e,f,g)
693         vshr.u64        d24,d18,#28
694         veor            d26,d25                 @ Sigma1(e)
695         vadd.i64        d27,d29,d17
696         vshr.u64        d25,d18,#34
697         vsli.64         d24,d18,#36
698         vadd.i64        d27,d26
699         vshr.u64        d26,d18,#39
700         vadd.i64        d28,d6
701         vsli.64         d25,d18,#30
702         veor            d30,d18,d19
703         vsli.64         d26,d18,#25
704         veor            d17,d24,d25
705         vadd.i64        d27,d28
706         vbsl            d30,d20,d19             @ Maj(a,b,c)
707         veor            d17,d26                 @ Sigma0(a)
708         vadd.i64        d21,d27
709         vadd.i64        d30,d27
710         @ vadd.i64      d17,d30
711         vshr.u64        d24,d21,#14     @ 7
712 #if 7<16
713         vld1.64         {d7},[r1]!      @ handles unaligned
714 #endif
715         vshr.u64        d25,d21,#18
716 #if 7>0
717          vadd.i64       d17,d30                 @ h+=Maj from the past
718 #endif
719         vshr.u64        d26,d21,#41
720         vld1.64         {d28},[r3,:64]! @ K[i++]
721         vsli.64         d24,d21,#50
722         vsli.64         d25,d21,#46
723         vmov            d29,d21
724         vsli.64         d26,d21,#23
725 #if 7<16 && defined(__ARMEL__)
726         vrev64.8        d7,d7
727 #endif
728         veor            d25,d24
729         vbsl            d29,d22,d23             @ Ch(e,f,g)
730         vshr.u64        d24,d17,#28
731         veor            d26,d25                 @ Sigma1(e)
732         vadd.i64        d27,d29,d16
733         vshr.u64        d25,d17,#34
734         vsli.64         d24,d17,#36
735         vadd.i64        d27,d26
736         vshr.u64        d26,d17,#39
737         vadd.i64        d28,d7
738         vsli.64         d25,d17,#30
739         veor            d30,d17,d18
740         vsli.64         d26,d17,#25
741         veor            d16,d24,d25
742         vadd.i64        d27,d28
743         vbsl            d30,d19,d18             @ Maj(a,b,c)
744         veor            d16,d26                 @ Sigma0(a)
745         vadd.i64        d20,d27
746         vadd.i64        d30,d27
747         @ vadd.i64      d16,d30
748         vshr.u64        d24,d20,#14     @ 8
749 #if 8<16
750         vld1.64         {d8},[r1]!      @ handles unaligned
751 #endif
752         vshr.u64        d25,d20,#18
753 #if 8>0
754          vadd.i64       d16,d30                 @ h+=Maj from the past
755 #endif
756         vshr.u64        d26,d20,#41
757         vld1.64         {d28},[r3,:64]! @ K[i++]
758         vsli.64         d24,d20,#50
759         vsli.64         d25,d20,#46
760         vmov            d29,d20
761         vsli.64         d26,d20,#23
762 #if 8<16 && defined(__ARMEL__)
763         vrev64.8        d8,d8
764 #endif
765         veor            d25,d24
766         vbsl            d29,d21,d22             @ Ch(e,f,g)
767         vshr.u64        d24,d16,#28
768         veor            d26,d25                 @ Sigma1(e)
769         vadd.i64        d27,d29,d23
770         vshr.u64        d25,d16,#34
771         vsli.64         d24,d16,#36
772         vadd.i64        d27,d26
773         vshr.u64        d26,d16,#39
774         vadd.i64        d28,d8
775         vsli.64         d25,d16,#30
776         veor            d30,d16,d17
777         vsli.64         d26,d16,#25
778         veor            d23,d24,d25
779         vadd.i64        d27,d28
780         vbsl            d30,d18,d17             @ Maj(a,b,c)
781         veor            d23,d26                 @ Sigma0(a)
782         vadd.i64        d19,d27
783         vadd.i64        d30,d27
784         @ vadd.i64      d23,d30
785         vshr.u64        d24,d19,#14     @ 9
786 #if 9<16
787         vld1.64         {d9},[r1]!      @ handles unaligned
788 #endif
789         vshr.u64        d25,d19,#18
790 #if 9>0
791          vadd.i64       d23,d30                 @ h+=Maj from the past
792 #endif
793         vshr.u64        d26,d19,#41
794         vld1.64         {d28},[r3,:64]! @ K[i++]
795         vsli.64         d24,d19,#50
796         vsli.64         d25,d19,#46
797         vmov            d29,d19
798         vsli.64         d26,d19,#23
799 #if 9<16 && defined(__ARMEL__)
800         vrev64.8        d9,d9
801 #endif
802         veor            d25,d24
803         vbsl            d29,d20,d21             @ Ch(e,f,g)
804         vshr.u64        d24,d23,#28
805         veor            d26,d25                 @ Sigma1(e)
806         vadd.i64        d27,d29,d22
807         vshr.u64        d25,d23,#34
808         vsli.64         d24,d23,#36
809         vadd.i64        d27,d26
810         vshr.u64        d26,d23,#39
811         vadd.i64        d28,d9
812         vsli.64         d25,d23,#30
813         veor            d30,d23,d16
814         vsli.64         d26,d23,#25
815         veor            d22,d24,d25
816         vadd.i64        d27,d28
817         vbsl            d30,d17,d16             @ Maj(a,b,c)
818         veor            d22,d26                 @ Sigma0(a)
819         vadd.i64        d18,d27
820         vadd.i64        d30,d27
821         @ vadd.i64      d22,d30
822         vshr.u64        d24,d18,#14     @ 10
823 #if 10<16
824         vld1.64         {d10},[r1]!     @ handles unaligned
825 #endif
826         vshr.u64        d25,d18,#18
827 #if 10>0
828          vadd.i64       d22,d30                 @ h+=Maj from the past
829 #endif
830         vshr.u64        d26,d18,#41
831         vld1.64         {d28},[r3,:64]! @ K[i++]
832         vsli.64         d24,d18,#50
833         vsli.64         d25,d18,#46
834         vmov            d29,d18
835         vsli.64         d26,d18,#23
836 #if 10<16 && defined(__ARMEL__)
837         vrev64.8        d10,d10
838 #endif
839         veor            d25,d24
840         vbsl            d29,d19,d20             @ Ch(e,f,g)
841         vshr.u64        d24,d22,#28
842         veor            d26,d25                 @ Sigma1(e)
843         vadd.i64        d27,d29,d21
844         vshr.u64        d25,d22,#34
845         vsli.64         d24,d22,#36
846         vadd.i64        d27,d26
847         vshr.u64        d26,d22,#39
848         vadd.i64        d28,d10
849         vsli.64         d25,d22,#30
850         veor            d30,d22,d23
851         vsli.64         d26,d22,#25
852         veor            d21,d24,d25
853         vadd.i64        d27,d28
854         vbsl            d30,d16,d23             @ Maj(a,b,c)
855         veor            d21,d26                 @ Sigma0(a)
856         vadd.i64        d17,d27
857         vadd.i64        d30,d27
858         @ vadd.i64      d21,d30
859         vshr.u64        d24,d17,#14     @ 11
860 #if 11<16
861         vld1.64         {d11},[r1]!     @ handles unaligned
862 #endif
863         vshr.u64        d25,d17,#18
864 #if 11>0
865          vadd.i64       d21,d30                 @ h+=Maj from the past
866 #endif
867         vshr.u64        d26,d17,#41
868         vld1.64         {d28},[r3,:64]! @ K[i++]
869         vsli.64         d24,d17,#50
870         vsli.64         d25,d17,#46
871         vmov            d29,d17
872         vsli.64         d26,d17,#23
873 #if 11<16 && defined(__ARMEL__)
874         vrev64.8        d11,d11
875 #endif
876         veor            d25,d24
877         vbsl            d29,d18,d19             @ Ch(e,f,g)
878         vshr.u64        d24,d21,#28
879         veor            d26,d25                 @ Sigma1(e)
880         vadd.i64        d27,d29,d20
881         vshr.u64        d25,d21,#34
882         vsli.64         d24,d21,#36
883         vadd.i64        d27,d26
884         vshr.u64        d26,d21,#39
885         vadd.i64        d28,d11
886         vsli.64         d25,d21,#30
887         veor            d30,d21,d22
888         vsli.64         d26,d21,#25
889         veor            d20,d24,d25
890         vadd.i64        d27,d28
891         vbsl            d30,d23,d22             @ Maj(a,b,c)
892         veor            d20,d26                 @ Sigma0(a)
893         vadd.i64        d16,d27
894         vadd.i64        d30,d27
895         @ vadd.i64      d20,d30
896         vshr.u64        d24,d16,#14     @ 12
897 #if 12<16
898         vld1.64         {d12},[r1]!     @ handles unaligned
899 #endif
900         vshr.u64        d25,d16,#18
901 #if 12>0
902          vadd.i64       d20,d30                 @ h+=Maj from the past
903 #endif
904         vshr.u64        d26,d16,#41
905         vld1.64         {d28},[r3,:64]! @ K[i++]
906         vsli.64         d24,d16,#50
907         vsli.64         d25,d16,#46
908         vmov            d29,d16
909         vsli.64         d26,d16,#23
910 #if 12<16 && defined(__ARMEL__)
911         vrev64.8        d12,d12
912 #endif
913         veor            d25,d24
914         vbsl            d29,d17,d18             @ Ch(e,f,g)
915         vshr.u64        d24,d20,#28
916         veor            d26,d25                 @ Sigma1(e)
917         vadd.i64        d27,d29,d19
918         vshr.u64        d25,d20,#34
919         vsli.64         d24,d20,#36
920         vadd.i64        d27,d26
921         vshr.u64        d26,d20,#39
922         vadd.i64        d28,d12
923         vsli.64         d25,d20,#30
924         veor            d30,d20,d21
925         vsli.64         d26,d20,#25
926         veor            d19,d24,d25
927         vadd.i64        d27,d28
928         vbsl            d30,d22,d21             @ Maj(a,b,c)
929         veor            d19,d26                 @ Sigma0(a)
930         vadd.i64        d23,d27
931         vadd.i64        d30,d27
932         @ vadd.i64      d19,d30
933         vshr.u64        d24,d23,#14     @ 13
934 #if 13<16
935         vld1.64         {d13},[r1]!     @ handles unaligned
936 #endif
937         vshr.u64        d25,d23,#18
938 #if 13>0
939          vadd.i64       d19,d30                 @ h+=Maj from the past
940 #endif
941         vshr.u64        d26,d23,#41
942         vld1.64         {d28},[r3,:64]! @ K[i++]
943         vsli.64         d24,d23,#50
944         vsli.64         d25,d23,#46
945         vmov            d29,d23
946         vsli.64         d26,d23,#23
947 #if 13<16 && defined(__ARMEL__)
948         vrev64.8        d13,d13
949 #endif
950         veor            d25,d24
951         vbsl            d29,d16,d17             @ Ch(e,f,g)
952         vshr.u64        d24,d19,#28
953         veor            d26,d25                 @ Sigma1(e)
954         vadd.i64        d27,d29,d18
955         vshr.u64        d25,d19,#34
956         vsli.64         d24,d19,#36
957         vadd.i64        d27,d26
958         vshr.u64        d26,d19,#39
959         vadd.i64        d28,d13
960         vsli.64         d25,d19,#30
961         veor            d30,d19,d20
962         vsli.64         d26,d19,#25
963         veor            d18,d24,d25
964         vadd.i64        d27,d28
965         vbsl            d30,d21,d20             @ Maj(a,b,c)
966         veor            d18,d26                 @ Sigma0(a)
967         vadd.i64        d22,d27
968         vadd.i64        d30,d27
969         @ vadd.i64      d18,d30
970         vshr.u64        d24,d22,#14     @ 14
971 #if 14<16
972         vld1.64         {d14},[r1]!     @ handles unaligned
973 #endif
974         vshr.u64        d25,d22,#18
975 #if 14>0
976          vadd.i64       d18,d30                 @ h+=Maj from the past
977 #endif
978         vshr.u64        d26,d22,#41
979         vld1.64         {d28},[r3,:64]! @ K[i++]
980         vsli.64         d24,d22,#50
981         vsli.64         d25,d22,#46
982         vmov            d29,d22
983         vsli.64         d26,d22,#23
984 #if 14<16 && defined(__ARMEL__)
985         vrev64.8        d14,d14
986 #endif
987         veor            d25,d24
988         vbsl            d29,d23,d16             @ Ch(e,f,g)
989         vshr.u64        d24,d18,#28
990         veor            d26,d25                 @ Sigma1(e)
991         vadd.i64        d27,d29,d17
992         vshr.u64        d25,d18,#34
993         vsli.64         d24,d18,#36
994         vadd.i64        d27,d26
995         vshr.u64        d26,d18,#39
996         vadd.i64        d28,d14
997         vsli.64         d25,d18,#30
998         veor            d30,d18,d19
999         vsli.64         d26,d18,#25
1000         veor            d17,d24,d25
1001         vadd.i64        d27,d28
1002         vbsl            d30,d20,d19             @ Maj(a,b,c)
1003         veor            d17,d26                 @ Sigma0(a)
1004         vadd.i64        d21,d27
1005         vadd.i64        d30,d27
1006         @ vadd.i64      d17,d30
1007         vshr.u64        d24,d21,#14     @ 15
1008 #if 15<16
1009         vld1.64         {d15},[r1]!     @ handles unaligned
1010 #endif
1011         vshr.u64        d25,d21,#18
1012 #if 15>0
1013          vadd.i64       d17,d30                 @ h+=Maj from the past
1014 #endif
1015         vshr.u64        d26,d21,#41
1016         vld1.64         {d28},[r3,:64]! @ K[i++]
1017         vsli.64         d24,d21,#50
1018         vsli.64         d25,d21,#46
1019         vmov            d29,d21
1020         vsli.64         d26,d21,#23
1021 #if 15<16 && defined(__ARMEL__)
1022         vrev64.8        d15,d15
1023 #endif
1024         veor            d25,d24
1025         vbsl            d29,d22,d23             @ Ch(e,f,g)
1026         vshr.u64        d24,d17,#28
1027         veor            d26,d25                 @ Sigma1(e)
1028         vadd.i64        d27,d29,d16
1029         vshr.u64        d25,d17,#34
1030         vsli.64         d24,d17,#36
1031         vadd.i64        d27,d26
1032         vshr.u64        d26,d17,#39
1033         vadd.i64        d28,d15
1034         vsli.64         d25,d17,#30
1035         veor            d30,d17,d18
1036         vsli.64         d26,d17,#25
1037         veor            d16,d24,d25
1038         vadd.i64        d27,d28
1039         vbsl            d30,d19,d18             @ Maj(a,b,c)
1040         veor            d16,d26                 @ Sigma0(a)
1041         vadd.i64        d20,d27
1042         vadd.i64        d30,d27
1043         @ vadd.i64      d16,d30
1044         mov             r12,#4
1045 .L16_79_neon:
1046         subs            r12,#1
1047         vshr.u64        q12,q7,#19
1048         vshr.u64        q13,q7,#61
1049          vadd.i64       d16,d30                 @ h+=Maj from the past
1050         vshr.u64        q15,q7,#6
1051         vsli.64         q12,q7,#45
1052         vext.8          q14,q0,q1,#8    @ X[i+1]
1053         vsli.64         q13,q7,#3
1054         veor            q15,q12
1055         vshr.u64        q12,q14,#1
1056         veor            q15,q13                         @ sigma1(X[i+14])
1057         vshr.u64        q13,q14,#8
1058         vadd.i64        q0,q15
1059         vshr.u64        q15,q14,#7
1060         vsli.64         q12,q14,#63
1061         vsli.64         q13,q14,#56
1062         vext.8          q14,q4,q5,#8    @ X[i+9]
1063         veor            q15,q12
1064         vshr.u64        d24,d20,#14             @ from NEON_00_15
1065         vadd.i64        q0,q14
1066         vshr.u64        d25,d20,#18             @ from NEON_00_15
1067         veor            q15,q13                         @ sigma0(X[i+1])
1068         vshr.u64        d26,d20,#41             @ from NEON_00_15
1069         vadd.i64        q0,q15
1070         vld1.64         {d28},[r3,:64]! @ K[i++]
1071         vsli.64         d24,d20,#50
1072         vsli.64         d25,d20,#46
1073         vmov            d29,d20
1074         vsli.64         d26,d20,#23
1075 #if 16<16 && defined(__ARMEL__)
1076         vrev64.8        ,
1077 #endif
1078         veor            d25,d24
1079         vbsl            d29,d21,d22             @ Ch(e,f,g)
1080         vshr.u64        d24,d16,#28
1081         veor            d26,d25                 @ Sigma1(e)
1082         vadd.i64        d27,d29,d23
1083         vshr.u64        d25,d16,#34
1084         vsli.64         d24,d16,#36
1085         vadd.i64        d27,d26
1086         vshr.u64        d26,d16,#39
1087         vadd.i64        d28,d0
1088         vsli.64         d25,d16,#30
1089         veor            d30,d16,d17
1090         vsli.64         d26,d16,#25
1091         veor            d23,d24,d25
1092         vadd.i64        d27,d28
1093         vbsl            d30,d18,d17             @ Maj(a,b,c)
1094         veor            d23,d26                 @ Sigma0(a)
1095         vadd.i64        d19,d27
1096         vadd.i64        d30,d27
1097         @ vadd.i64      d23,d30
1098         vshr.u64        d24,d19,#14     @ 17
1099 #if 17<16
1100         vld1.64         {d1},[r1]!      @ handles unaligned
1101 #endif
1102         vshr.u64        d25,d19,#18
1103 #if 17>0
1104          vadd.i64       d23,d30                 @ h+=Maj from the past
1105 #endif
1106         vshr.u64        d26,d19,#41
1107         vld1.64         {d28},[r3,:64]! @ K[i++]
1108         vsli.64         d24,d19,#50
1109         vsli.64         d25,d19,#46
1110         vmov            d29,d19
1111         vsli.64         d26,d19,#23
1112 #if 17<16 && defined(__ARMEL__)
1113         vrev64.8        ,
1114 #endif
1115         veor            d25,d24
1116         vbsl            d29,d20,d21             @ Ch(e,f,g)
1117         vshr.u64        d24,d23,#28
1118         veor            d26,d25                 @ Sigma1(e)
1119         vadd.i64        d27,d29,d22
1120         vshr.u64        d25,d23,#34
1121         vsli.64         d24,d23,#36
1122         vadd.i64        d27,d26
1123         vshr.u64        d26,d23,#39
1124         vadd.i64        d28,d1
1125         vsli.64         d25,d23,#30
1126         veor            d30,d23,d16
1127         vsli.64         d26,d23,#25
1128         veor            d22,d24,d25
1129         vadd.i64        d27,d28
1130         vbsl            d30,d17,d16             @ Maj(a,b,c)
1131         veor            d22,d26                 @ Sigma0(a)
1132         vadd.i64        d18,d27
1133         vadd.i64        d30,d27
1134         @ vadd.i64      d22,d30
1135         vshr.u64        q12,q0,#19
1136         vshr.u64        q13,q0,#61
1137          vadd.i64       d22,d30                 @ h+=Maj from the past
1138         vshr.u64        q15,q0,#6
1139         vsli.64         q12,q0,#45
1140         vext.8          q14,q1,q2,#8    @ X[i+1]
1141         vsli.64         q13,q0,#3
1142         veor            q15,q12
1143         vshr.u64        q12,q14,#1
1144         veor            q15,q13                         @ sigma1(X[i+14])
1145         vshr.u64        q13,q14,#8
1146         vadd.i64        q1,q15
1147         vshr.u64        q15,q14,#7
1148         vsli.64         q12,q14,#63
1149         vsli.64         q13,q14,#56
1150         vext.8          q14,q5,q6,#8    @ X[i+9]
1151         veor            q15,q12
1152         vshr.u64        d24,d18,#14             @ from NEON_00_15
1153         vadd.i64        q1,q14
1154         vshr.u64        d25,d18,#18             @ from NEON_00_15
1155         veor            q15,q13                         @ sigma0(X[i+1])
1156         vshr.u64        d26,d18,#41             @ from NEON_00_15
1157         vadd.i64        q1,q15
1158         vld1.64         {d28},[r3,:64]! @ K[i++]
1159         vsli.64         d24,d18,#50
1160         vsli.64         d25,d18,#46
1161         vmov            d29,d18
1162         vsli.64         d26,d18,#23
1163 #if 18<16 && defined(__ARMEL__)
1164         vrev64.8        ,
1165 #endif
1166         veor            d25,d24
1167         vbsl            d29,d19,d20             @ Ch(e,f,g)
1168         vshr.u64        d24,d22,#28
1169         veor            d26,d25                 @ Sigma1(e)
1170         vadd.i64        d27,d29,d21
1171         vshr.u64        d25,d22,#34
1172         vsli.64         d24,d22,#36
1173         vadd.i64        d27,d26
1174         vshr.u64        d26,d22,#39
1175         vadd.i64        d28,d2
1176         vsli.64         d25,d22,#30
1177         veor            d30,d22,d23
1178         vsli.64         d26,d22,#25
1179         veor            d21,d24,d25
1180         vadd.i64        d27,d28
1181         vbsl            d30,d16,d23             @ Maj(a,b,c)
1182         veor            d21,d26                 @ Sigma0(a)
1183         vadd.i64        d17,d27
1184         vadd.i64        d30,d27
1185         @ vadd.i64      d21,d30
1186         vshr.u64        d24,d17,#14     @ 19
1187 #if 19<16
1188         vld1.64         {d3},[r1]!      @ handles unaligned
1189 #endif
1190         vshr.u64        d25,d17,#18
1191 #if 19>0
1192          vadd.i64       d21,d30                 @ h+=Maj from the past
1193 #endif
1194         vshr.u64        d26,d17,#41
1195         vld1.64         {d28},[r3,:64]! @ K[i++]
1196         vsli.64         d24,d17,#50
1197         vsli.64         d25,d17,#46
1198         vmov            d29,d17
1199         vsli.64         d26,d17,#23
1200 #if 19<16 && defined(__ARMEL__)
1201         vrev64.8        ,
1202 #endif
1203         veor            d25,d24
1204         vbsl            d29,d18,d19             @ Ch(e,f,g)
1205         vshr.u64        d24,d21,#28
1206         veor            d26,d25                 @ Sigma1(e)
1207         vadd.i64        d27,d29,d20
1208         vshr.u64        d25,d21,#34
1209         vsli.64         d24,d21,#36
1210         vadd.i64        d27,d26
1211         vshr.u64        d26,d21,#39
1212         vadd.i64        d28,d3
1213         vsli.64         d25,d21,#30
1214         veor            d30,d21,d22
1215         vsli.64         d26,d21,#25
1216         veor            d20,d24,d25
1217         vadd.i64        d27,d28
1218         vbsl            d30,d23,d22             @ Maj(a,b,c)
1219         veor            d20,d26                 @ Sigma0(a)
1220         vadd.i64        d16,d27
1221         vadd.i64        d30,d27
1222         @ vadd.i64      d20,d30
1223         vshr.u64        q12,q1,#19
1224         vshr.u64        q13,q1,#61
1225          vadd.i64       d20,d30                 @ h+=Maj from the past
1226         vshr.u64        q15,q1,#6
1227         vsli.64         q12,q1,#45
1228         vext.8          q14,q2,q3,#8    @ X[i+1]
1229         vsli.64         q13,q1,#3
1230         veor            q15,q12
1231         vshr.u64        q12,q14,#1
1232         veor            q15,q13                         @ sigma1(X[i+14])
1233         vshr.u64        q13,q14,#8
1234         vadd.i64        q2,q15
1235         vshr.u64        q15,q14,#7
1236         vsli.64         q12,q14,#63
1237         vsli.64         q13,q14,#56
1238         vext.8          q14,q6,q7,#8    @ X[i+9]
1239         veor            q15,q12
1240         vshr.u64        d24,d16,#14             @ from NEON_00_15
1241         vadd.i64        q2,q14
1242         vshr.u64        d25,d16,#18             @ from NEON_00_15
1243         veor            q15,q13                         @ sigma0(X[i+1])
1244         vshr.u64        d26,d16,#41             @ from NEON_00_15
1245         vadd.i64        q2,q15
1246         vld1.64         {d28},[r3,:64]! @ K[i++]
1247         vsli.64         d24,d16,#50
1248         vsli.64         d25,d16,#46
1249         vmov            d29,d16
1250         vsli.64         d26,d16,#23
1251 #if 20<16 && defined(__ARMEL__)
1252         vrev64.8        ,
1253 #endif
1254         veor            d25,d24
1255         vbsl            d29,d17,d18             @ Ch(e,f,g)
1256         vshr.u64        d24,d20,#28
1257         veor            d26,d25                 @ Sigma1(e)
1258         vadd.i64        d27,d29,d19
1259         vshr.u64        d25,d20,#34
1260         vsli.64         d24,d20,#36
1261         vadd.i64        d27,d26
1262         vshr.u64        d26,d20,#39
1263         vadd.i64        d28,d4
1264         vsli.64         d25,d20,#30
1265         veor            d30,d20,d21
1266         vsli.64         d26,d20,#25
1267         veor            d19,d24,d25
1268         vadd.i64        d27,d28
1269         vbsl            d30,d22,d21             @ Maj(a,b,c)
1270         veor            d19,d26                 @ Sigma0(a)
1271         vadd.i64        d23,d27
1272         vadd.i64        d30,d27
1273         @ vadd.i64      d19,d30
1274         vshr.u64        d24,d23,#14     @ 21
1275 #if 21<16
1276         vld1.64         {d5},[r1]!      @ handles unaligned
1277 #endif
1278         vshr.u64        d25,d23,#18
1279 #if 21>0
1280          vadd.i64       d19,d30                 @ h+=Maj from the past
1281 #endif
1282         vshr.u64        d26,d23,#41
1283         vld1.64         {d28},[r3,:64]! @ K[i++]
1284         vsli.64         d24,d23,#50
1285         vsli.64         d25,d23,#46
1286         vmov            d29,d23
1287         vsli.64         d26,d23,#23
1288 #if 21<16 && defined(__ARMEL__)
1289         vrev64.8        ,
1290 #endif
1291         veor            d25,d24
1292         vbsl            d29,d16,d17             @ Ch(e,f,g)
1293         vshr.u64        d24,d19,#28
1294         veor            d26,d25                 @ Sigma1(e)
1295         vadd.i64        d27,d29,d18
1296         vshr.u64        d25,d19,#34
1297         vsli.64         d24,d19,#36
1298         vadd.i64        d27,d26
1299         vshr.u64        d26,d19,#39
1300         vadd.i64        d28,d5
1301         vsli.64         d25,d19,#30
1302         veor            d30,d19,d20
1303         vsli.64         d26,d19,#25
1304         veor            d18,d24,d25
1305         vadd.i64        d27,d28
1306         vbsl            d30,d21,d20             @ Maj(a,b,c)
1307         veor            d18,d26                 @ Sigma0(a)
1308         vadd.i64        d22,d27
1309         vadd.i64        d30,d27
1310         @ vadd.i64      d18,d30
1311         vshr.u64        q12,q2,#19
1312         vshr.u64        q13,q2,#61
1313          vadd.i64       d18,d30                 @ h+=Maj from the past
1314         vshr.u64        q15,q2,#6
1315         vsli.64         q12,q2,#45
1316         vext.8          q14,q3,q4,#8    @ X[i+1]
1317         vsli.64         q13,q2,#3
1318         veor            q15,q12
1319         vshr.u64        q12,q14,#1
1320         veor            q15,q13                         @ sigma1(X[i+14])
1321         vshr.u64        q13,q14,#8
1322         vadd.i64        q3,q15
1323         vshr.u64        q15,q14,#7
1324         vsli.64         q12,q14,#63
1325         vsli.64         q13,q14,#56
1326         vext.8          q14,q7,q0,#8    @ X[i+9]
1327         veor            q15,q12
1328         vshr.u64        d24,d22,#14             @ from NEON_00_15
1329         vadd.i64        q3,q14
1330         vshr.u64        d25,d22,#18             @ from NEON_00_15
1331         veor            q15,q13                         @ sigma0(X[i+1])
1332         vshr.u64        d26,d22,#41             @ from NEON_00_15
1333         vadd.i64        q3,q15
1334         vld1.64         {d28},[r3,:64]! @ K[i++]
1335         vsli.64         d24,d22,#50
1336         vsli.64         d25,d22,#46
1337         vmov            d29,d22
1338         vsli.64         d26,d22,#23
1339 #if 22<16 && defined(__ARMEL__)
1340         vrev64.8        ,
1341 #endif
1342         veor            d25,d24
1343         vbsl            d29,d23,d16             @ Ch(e,f,g)
1344         vshr.u64        d24,d18,#28
1345         veor            d26,d25                 @ Sigma1(e)
1346         vadd.i64        d27,d29,d17
1347         vshr.u64        d25,d18,#34
1348         vsli.64         d24,d18,#36
1349         vadd.i64        d27,d26
1350         vshr.u64        d26,d18,#39
1351         vadd.i64        d28,d6
1352         vsli.64         d25,d18,#30
1353         veor            d30,d18,d19
1354         vsli.64         d26,d18,#25
1355         veor            d17,d24,d25
1356         vadd.i64        d27,d28
1357         vbsl            d30,d20,d19             @ Maj(a,b,c)
1358         veor            d17,d26                 @ Sigma0(a)
1359         vadd.i64        d21,d27
1360         vadd.i64        d30,d27
1361         @ vadd.i64      d17,d30
1362         vshr.u64        d24,d21,#14     @ 23
1363 #if 23<16
1364         vld1.64         {d7},[r1]!      @ handles unaligned
1365 #endif
1366         vshr.u64        d25,d21,#18
1367 #if 23>0
1368          vadd.i64       d17,d30                 @ h+=Maj from the past
1369 #endif
1370         vshr.u64        d26,d21,#41
1371         vld1.64         {d28},[r3,:64]! @ K[i++]
1372         vsli.64         d24,d21,#50
1373         vsli.64         d25,d21,#46
1374         vmov            d29,d21
1375         vsli.64         d26,d21,#23
1376 #if 23<16 && defined(__ARMEL__)
1377         vrev64.8        ,
1378 #endif
1379         veor            d25,d24
1380         vbsl            d29,d22,d23             @ Ch(e,f,g)
1381         vshr.u64        d24,d17,#28
1382         veor            d26,d25                 @ Sigma1(e)
1383         vadd.i64        d27,d29,d16
1384         vshr.u64        d25,d17,#34
1385         vsli.64         d24,d17,#36
1386         vadd.i64        d27,d26
1387         vshr.u64        d26,d17,#39
1388         vadd.i64        d28,d7
1389         vsli.64         d25,d17,#30
1390         veor            d30,d17,d18
1391         vsli.64         d26,d17,#25
1392         veor            d16,d24,d25
1393         vadd.i64        d27,d28
1394         vbsl            d30,d19,d18             @ Maj(a,b,c)
1395         veor            d16,d26                 @ Sigma0(a)
1396         vadd.i64        d20,d27
1397         vadd.i64        d30,d27
1398         @ vadd.i64      d16,d30
1399         vshr.u64        q12,q3,#19
1400         vshr.u64        q13,q3,#61
1401          vadd.i64       d16,d30                 @ h+=Maj from the past
1402         vshr.u64        q15,q3,#6
1403         vsli.64         q12,q3,#45
1404         vext.8          q14,q4,q5,#8    @ X[i+1]
1405         vsli.64         q13,q3,#3
1406         veor            q15,q12
1407         vshr.u64        q12,q14,#1
1408         veor            q15,q13                         @ sigma1(X[i+14])
1409         vshr.u64        q13,q14,#8
1410         vadd.i64        q4,q15
1411         vshr.u64        q15,q14,#7
1412         vsli.64         q12,q14,#63
1413         vsli.64         q13,q14,#56
1414         vext.8          q14,q0,q1,#8    @ X[i+9]
1415         veor            q15,q12
1416         vshr.u64        d24,d20,#14             @ from NEON_00_15
1417         vadd.i64        q4,q14
1418         vshr.u64        d25,d20,#18             @ from NEON_00_15
1419         veor            q15,q13                         @ sigma0(X[i+1])
1420         vshr.u64        d26,d20,#41             @ from NEON_00_15
1421         vadd.i64        q4,q15
1422         vld1.64         {d28},[r3,:64]! @ K[i++]
1423         vsli.64         d24,d20,#50
1424         vsli.64         d25,d20,#46
1425         vmov            d29,d20
1426         vsli.64         d26,d20,#23
1427 #if 24<16 && defined(__ARMEL__)
1428         vrev64.8        ,
1429 #endif
1430         veor            d25,d24
1431         vbsl            d29,d21,d22             @ Ch(e,f,g)
1432         vshr.u64        d24,d16,#28
1433         veor            d26,d25                 @ Sigma1(e)
1434         vadd.i64        d27,d29,d23
1435         vshr.u64        d25,d16,#34
1436         vsli.64         d24,d16,#36
1437         vadd.i64        d27,d26
1438         vshr.u64        d26,d16,#39
1439         vadd.i64        d28,d8
1440         vsli.64         d25,d16,#30
1441         veor            d30,d16,d17
1442         vsli.64         d26,d16,#25
1443         veor            d23,d24,d25
1444         vadd.i64        d27,d28
1445         vbsl            d30,d18,d17             @ Maj(a,b,c)
1446         veor            d23,d26                 @ Sigma0(a)
1447         vadd.i64        d19,d27
1448         vadd.i64        d30,d27
1449         @ vadd.i64      d23,d30
1450         vshr.u64        d24,d19,#14     @ 25
1451 #if 25<16
1452         vld1.64         {d9},[r1]!      @ handles unaligned
1453 #endif
1454         vshr.u64        d25,d19,#18
1455 #if 25>0
1456          vadd.i64       d23,d30                 @ h+=Maj from the past
1457 #endif
1458         vshr.u64        d26,d19,#41
1459         vld1.64         {d28},[r3,:64]! @ K[i++]
1460         vsli.64         d24,d19,#50
1461         vsli.64         d25,d19,#46
1462         vmov            d29,d19
1463         vsli.64         d26,d19,#23
1464 #if 25<16 && defined(__ARMEL__)
1465         vrev64.8        ,
1466 #endif
1467         veor            d25,d24
1468         vbsl            d29,d20,d21             @ Ch(e,f,g)
1469         vshr.u64        d24,d23,#28
1470         veor            d26,d25                 @ Sigma1(e)
1471         vadd.i64        d27,d29,d22
1472         vshr.u64        d25,d23,#34
1473         vsli.64         d24,d23,#36
1474         vadd.i64        d27,d26
1475         vshr.u64        d26,d23,#39
1476         vadd.i64        d28,d9
1477         vsli.64         d25,d23,#30
1478         veor            d30,d23,d16
1479         vsli.64         d26,d23,#25
1480         veor            d22,d24,d25
1481         vadd.i64        d27,d28
1482         vbsl            d30,d17,d16             @ Maj(a,b,c)
1483         veor            d22,d26                 @ Sigma0(a)
1484         vadd.i64        d18,d27
1485         vadd.i64        d30,d27
1486         @ vadd.i64      d22,d30
1487         vshr.u64        q12,q4,#19
1488         vshr.u64        q13,q4,#61
1489          vadd.i64       d22,d30                 @ h+=Maj from the past
1490         vshr.u64        q15,q4,#6
1491         vsli.64         q12,q4,#45
1492         vext.8          q14,q5,q6,#8    @ X[i+1]
1493         vsli.64         q13,q4,#3
1494         veor            q15,q12
1495         vshr.u64        q12,q14,#1
1496         veor            q15,q13                         @ sigma1(X[i+14])
1497         vshr.u64        q13,q14,#8
1498         vadd.i64        q5,q15
1499         vshr.u64        q15,q14,#7
1500         vsli.64         q12,q14,#63
1501         vsli.64         q13,q14,#56
1502         vext.8          q14,q1,q2,#8    @ X[i+9]
1503         veor            q15,q12
1504         vshr.u64        d24,d18,#14             @ from NEON_00_15
1505         vadd.i64        q5,q14
1506         vshr.u64        d25,d18,#18             @ from NEON_00_15
1507         veor            q15,q13                         @ sigma0(X[i+1])
1508         vshr.u64        d26,d18,#41             @ from NEON_00_15
1509         vadd.i64        q5,q15
1510         vld1.64         {d28},[r3,:64]! @ K[i++]
1511         vsli.64         d24,d18,#50
1512         vsli.64         d25,d18,#46
1513         vmov            d29,d18
1514         vsli.64         d26,d18,#23
1515 #if 26<16 && defined(__ARMEL__)
1516         vrev64.8        ,
1517 #endif
1518         veor            d25,d24
1519         vbsl            d29,d19,d20             @ Ch(e,f,g)
1520         vshr.u64        d24,d22,#28
1521         veor            d26,d25                 @ Sigma1(e)
1522         vadd.i64        d27,d29,d21
1523         vshr.u64        d25,d22,#34
1524         vsli.64         d24,d22,#36
1525         vadd.i64        d27,d26
1526         vshr.u64        d26,d22,#39
1527         vadd.i64        d28,d10
1528         vsli.64         d25,d22,#30
1529         veor            d30,d22,d23
1530         vsli.64         d26,d22,#25
1531         veor            d21,d24,d25
1532         vadd.i64        d27,d28
1533         vbsl            d30,d16,d23             @ Maj(a,b,c)
1534         veor            d21,d26                 @ Sigma0(a)
1535         vadd.i64        d17,d27
1536         vadd.i64        d30,d27
1537         @ vadd.i64      d21,d30
1538         vshr.u64        d24,d17,#14     @ 27
1539 #if 27<16
1540         vld1.64         {d11},[r1]!     @ handles unaligned
1541 #endif
1542         vshr.u64        d25,d17,#18
1543 #if 27>0
1544          vadd.i64       d21,d30                 @ h+=Maj from the past
1545 #endif
1546         vshr.u64        d26,d17,#41
1547         vld1.64         {d28},[r3,:64]! @ K[i++]
1548         vsli.64         d24,d17,#50
1549         vsli.64         d25,d17,#46
1550         vmov            d29,d17
1551         vsli.64         d26,d17,#23
1552 #if 27<16 && defined(__ARMEL__)
1553         vrev64.8        ,
1554 #endif
1555         veor            d25,d24
1556         vbsl            d29,d18,d19             @ Ch(e,f,g)
1557         vshr.u64        d24,d21,#28
1558         veor            d26,d25                 @ Sigma1(e)
1559         vadd.i64        d27,d29,d20
1560         vshr.u64        d25,d21,#34
1561         vsli.64         d24,d21,#36
1562         vadd.i64        d27,d26
1563         vshr.u64        d26,d21,#39
1564         vadd.i64        d28,d11
1565         vsli.64         d25,d21,#30
1566         veor            d30,d21,d22
1567         vsli.64         d26,d21,#25
1568         veor            d20,d24,d25
1569         vadd.i64        d27,d28
1570         vbsl            d30,d23,d22             @ Maj(a,b,c)
1571         veor            d20,d26                 @ Sigma0(a)
1572         vadd.i64        d16,d27
1573         vadd.i64        d30,d27
1574         @ vadd.i64      d20,d30
1575         vshr.u64        q12,q5,#19
1576         vshr.u64        q13,q5,#61
1577          vadd.i64       d20,d30                 @ h+=Maj from the past
1578         vshr.u64        q15,q5,#6
1579         vsli.64         q12,q5,#45
1580         vext.8          q14,q6,q7,#8    @ X[i+1]
1581         vsli.64         q13,q5,#3
1582         veor            q15,q12
1583         vshr.u64        q12,q14,#1
1584         veor            q15,q13                         @ sigma1(X[i+14])
1585         vshr.u64        q13,q14,#8
1586         vadd.i64        q6,q15
1587         vshr.u64        q15,q14,#7
1588         vsli.64         q12,q14,#63
1589         vsli.64         q13,q14,#56
1590         vext.8          q14,q2,q3,#8    @ X[i+9]
1591         veor            q15,q12
1592         vshr.u64        d24,d16,#14             @ from NEON_00_15
1593         vadd.i64        q6,q14
1594         vshr.u64        d25,d16,#18             @ from NEON_00_15
1595         veor            q15,q13                         @ sigma0(X[i+1])
1596         vshr.u64        d26,d16,#41             @ from NEON_00_15
1597         vadd.i64        q6,q15
1598         vld1.64         {d28},[r3,:64]! @ K[i++]
1599         vsli.64         d24,d16,#50
1600         vsli.64         d25,d16,#46
1601         vmov            d29,d16
1602         vsli.64         d26,d16,#23
1603 #if 28<16 && defined(__ARMEL__)
1604         vrev64.8        ,
1605 #endif
1606         veor            d25,d24
1607         vbsl            d29,d17,d18             @ Ch(e,f,g)
1608         vshr.u64        d24,d20,#28
1609         veor            d26,d25                 @ Sigma1(e)
1610         vadd.i64        d27,d29,d19
1611         vshr.u64        d25,d20,#34
1612         vsli.64         d24,d20,#36
1613         vadd.i64        d27,d26
1614         vshr.u64        d26,d20,#39
1615         vadd.i64        d28,d12
1616         vsli.64         d25,d20,#30
1617         veor            d30,d20,d21
1618         vsli.64         d26,d20,#25
1619         veor            d19,d24,d25
1620         vadd.i64        d27,d28
1621         vbsl            d30,d22,d21             @ Maj(a,b,c)
1622         veor            d19,d26                 @ Sigma0(a)
1623         vadd.i64        d23,d27
1624         vadd.i64        d30,d27
1625         @ vadd.i64      d19,d30
1626         vshr.u64        d24,d23,#14     @ 29
1627 #if 29<16
1628         vld1.64         {d13},[r1]!     @ handles unaligned
1629 #endif
1630         vshr.u64        d25,d23,#18
1631 #if 29>0
1632          vadd.i64       d19,d30                 @ h+=Maj from the past
1633 #endif
1634         vshr.u64        d26,d23,#41
1635         vld1.64         {d28},[r3,:64]! @ K[i++]
1636         vsli.64         d24,d23,#50
1637         vsli.64         d25,d23,#46
1638         vmov            d29,d23
1639         vsli.64         d26,d23,#23
1640 #if 29<16 && defined(__ARMEL__)
1641         vrev64.8        ,
1642 #endif
1643         veor            d25,d24
1644         vbsl            d29,d16,d17             @ Ch(e,f,g)
1645         vshr.u64        d24,d19,#28
1646         veor            d26,d25                 @ Sigma1(e)
1647         vadd.i64        d27,d29,d18
1648         vshr.u64        d25,d19,#34
1649         vsli.64         d24,d19,#36
1650         vadd.i64        d27,d26
1651         vshr.u64        d26,d19,#39
1652         vadd.i64        d28,d13
1653         vsli.64         d25,d19,#30
1654         veor            d30,d19,d20
1655         vsli.64         d26,d19,#25
1656         veor            d18,d24,d25
1657         vadd.i64        d27,d28
1658         vbsl            d30,d21,d20             @ Maj(a,b,c)
1659         veor            d18,d26                 @ Sigma0(a)
1660         vadd.i64        d22,d27
1661         vadd.i64        d30,d27
1662         @ vadd.i64      d18,d30
1663         vshr.u64        q12,q6,#19
1664         vshr.u64        q13,q6,#61
1665          vadd.i64       d18,d30                 @ h+=Maj from the past
1666         vshr.u64        q15,q6,#6
1667         vsli.64         q12,q6,#45
1668         vext.8          q14,q7,q0,#8    @ X[i+1]
1669         vsli.64         q13,q6,#3
1670         veor            q15,q12
1671         vshr.u64        q12,q14,#1
1672         veor            q15,q13                         @ sigma1(X[i+14])
1673         vshr.u64        q13,q14,#8
1674         vadd.i64        q7,q15
1675         vshr.u64        q15,q14,#7
1676         vsli.64         q12,q14,#63
1677         vsli.64         q13,q14,#56
1678         vext.8          q14,q3,q4,#8    @ X[i+9]
1679         veor            q15,q12
1680         vshr.u64        d24,d22,#14             @ from NEON_00_15
1681         vadd.i64        q7,q14
1682         vshr.u64        d25,d22,#18             @ from NEON_00_15
1683         veor            q15,q13                         @ sigma0(X[i+1])
1684         vshr.u64        d26,d22,#41             @ from NEON_00_15
1685         vadd.i64        q7,q15
1686         vld1.64         {d28},[r3,:64]! @ K[i++]
1687         vsli.64         d24,d22,#50
1688         vsli.64         d25,d22,#46
1689         vmov            d29,d22
1690         vsli.64         d26,d22,#23
1691 #if 30<16 && defined(__ARMEL__)
1692         vrev64.8        ,
1693 #endif
1694         veor            d25,d24
1695         vbsl            d29,d23,d16             @ Ch(e,f,g)
1696         vshr.u64        d24,d18,#28
1697         veor            d26,d25                 @ Sigma1(e)
1698         vadd.i64        d27,d29,d17
1699         vshr.u64        d25,d18,#34
1700         vsli.64         d24,d18,#36
1701         vadd.i64        d27,d26
1702         vshr.u64        d26,d18,#39
1703         vadd.i64        d28,d14
1704         vsli.64         d25,d18,#30
1705         veor            d30,d18,d19
1706         vsli.64         d26,d18,#25
1707         veor            d17,d24,d25
1708         vadd.i64        d27,d28
1709         vbsl            d30,d20,d19             @ Maj(a,b,c)
1710         veor            d17,d26                 @ Sigma0(a)
1711         vadd.i64        d21,d27
1712         vadd.i64        d30,d27
1713         @ vadd.i64      d17,d30
1714         vshr.u64        d24,d21,#14     @ 31
1715 #if 31<16
1716         vld1.64         {d15},[r1]!     @ handles unaligned
1717 #endif
1718         vshr.u64        d25,d21,#18
1719 #if 31>0
1720          vadd.i64       d17,d30                 @ h+=Maj from the past
1721 #endif
1722         vshr.u64        d26,d21,#41
1723         vld1.64         {d28},[r3,:64]! @ K[i++]
1724         vsli.64         d24,d21,#50
1725         vsli.64         d25,d21,#46
1726         vmov            d29,d21
1727         vsli.64         d26,d21,#23
1728 #if 31<16 && defined(__ARMEL__)
1729         vrev64.8        ,
1730 #endif
1731         veor            d25,d24
1732         vbsl            d29,d22,d23             @ Ch(e,f,g)
1733         vshr.u64        d24,d17,#28
1734         veor            d26,d25                 @ Sigma1(e)
1735         vadd.i64        d27,d29,d16
1736         vshr.u64        d25,d17,#34
1737         vsli.64         d24,d17,#36
1738         vadd.i64        d27,d26
1739         vshr.u64        d26,d17,#39
1740         vadd.i64        d28,d15
1741         vsli.64         d25,d17,#30
1742         veor            d30,d17,d18
1743         vsli.64         d26,d17,#25
1744         veor            d16,d24,d25
1745         vadd.i64        d27,d28
1746         vbsl            d30,d19,d18             @ Maj(a,b,c)
1747         veor            d16,d26                 @ Sigma0(a)
1748         vadd.i64        d20,d27
1749         vadd.i64        d30,d27
1750         @ vadd.i64      d16,d30
1751         bne             .L16_79_neon
1752
1753          vadd.i64       d16,d30         @ h+=Maj from the past
1754         vldmia          r0,{d24-d31}    @ load context to temp
1755         vadd.i64        q8,q12          @ vectorized accumulate
1756         vadd.i64        q9,q13
1757         vadd.i64        q10,q14
1758         vadd.i64        q11,q15
1759         vstmia          r0,{d16-d23}    @ save context
1760         teq             r1,r2
1761         sub             r3,#640 @ rewind K512
1762         bne             .Loop_neon
1763
1764         vldmia  sp!,{d8-d15}            @ epilogue
1765         .word   0xe12fff1e
1766 #endif
1767 .size   sha512_block_data_order,.-sha512_block_data_order
1768 .asciz  "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
1769 .align  2
1770 .comm   OPENSSL_armcap_P,4,4
1771
1772 #endif