Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / third_party / boringssl / win-x86_64 / crypto / bn / rsaz-x86_64.asm
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7
8 EXTERN  OPENSSL_ia32cap_P
9
10 global  rsaz_512_sqr
11
12 ALIGN   32
13 rsaz_512_sqr:
14         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
15         mov     QWORD[16+rsp],rsi
16         mov     rax,rsp
17 $L$SEH_begin_rsaz_512_sqr:
18         mov     rdi,rcx
19         mov     rsi,rdx
20         mov     rdx,r8
21         mov     rcx,r9
22         mov     r8,QWORD[40+rsp]
23
24
25         push    rbx
26         push    rbp
27         push    r12
28         push    r13
29         push    r14
30         push    r15
31
32         sub     rsp,128+24
33 $L$sqr_body:
34         mov     rbp,rdx
35         mov     rdx,QWORD[rsi]
36         mov     rax,QWORD[8+rsi]
37         mov     QWORD[128+rsp],rcx
38         jmp     NEAR $L$oop_sqr
39
40 ALIGN   32
41 $L$oop_sqr:
42         mov     DWORD[((128+8))+rsp],r8d
43
44         mov     rbx,rdx
45         mul     rdx
46         mov     r8,rax
47         mov     rax,QWORD[16+rsi]
48         mov     r9,rdx
49
50         mul     rbx
51         add     r9,rax
52         mov     rax,QWORD[24+rsi]
53         mov     r10,rdx
54         adc     r10,0
55
56         mul     rbx
57         add     r10,rax
58         mov     rax,QWORD[32+rsi]
59         mov     r11,rdx
60         adc     r11,0
61
62         mul     rbx
63         add     r11,rax
64         mov     rax,QWORD[40+rsi]
65         mov     r12,rdx
66         adc     r12,0
67
68         mul     rbx
69         add     r12,rax
70         mov     rax,QWORD[48+rsi]
71         mov     r13,rdx
72         adc     r13,0
73
74         mul     rbx
75         add     r13,rax
76         mov     rax,QWORD[56+rsi]
77         mov     r14,rdx
78         adc     r14,0
79
80         mul     rbx
81         add     r14,rax
82         mov     rax,rbx
83         mov     r15,rdx
84         adc     r15,0
85
86         add     r8,r8
87         mov     rcx,r9
88         adc     r9,r9
89
90         mul     rax
91         mov     QWORD[rsp],rax
92         add     r8,rdx
93         adc     r9,0
94
95         mov     QWORD[8+rsp],r8
96         shr     rcx,63
97
98
99         mov     r8,QWORD[8+rsi]
100         mov     rax,QWORD[16+rsi]
101         mul     r8
102         add     r10,rax
103         mov     rax,QWORD[24+rsi]
104         mov     rbx,rdx
105         adc     rbx,0
106
107         mul     r8
108         add     r11,rax
109         mov     rax,QWORD[32+rsi]
110         adc     rdx,0
111         add     r11,rbx
112         mov     rbx,rdx
113         adc     rbx,0
114
115         mul     r8
116         add     r12,rax
117         mov     rax,QWORD[40+rsi]
118         adc     rdx,0
119         add     r12,rbx
120         mov     rbx,rdx
121         adc     rbx,0
122
123         mul     r8
124         add     r13,rax
125         mov     rax,QWORD[48+rsi]
126         adc     rdx,0
127         add     r13,rbx
128         mov     rbx,rdx
129         adc     rbx,0
130
131         mul     r8
132         add     r14,rax
133         mov     rax,QWORD[56+rsi]
134         adc     rdx,0
135         add     r14,rbx
136         mov     rbx,rdx
137         adc     rbx,0
138
139         mul     r8
140         add     r15,rax
141         mov     rax,r8
142         adc     rdx,0
143         add     r15,rbx
144         mov     r8,rdx
145         mov     rdx,r10
146         adc     r8,0
147
148         add     rdx,rdx
149         lea     r10,[r10*2+rcx]
150         mov     rbx,r11
151         adc     r11,r11
152
153         mul     rax
154         add     r9,rax
155         adc     r10,rdx
156         adc     r11,0
157
158         mov     QWORD[16+rsp],r9
159         mov     QWORD[24+rsp],r10
160         shr     rbx,63
161
162
163         mov     r9,QWORD[16+rsi]
164         mov     rax,QWORD[24+rsi]
165         mul     r9
166         add     r12,rax
167         mov     rax,QWORD[32+rsi]
168         mov     rcx,rdx
169         adc     rcx,0
170
171         mul     r9
172         add     r13,rax
173         mov     rax,QWORD[40+rsi]
174         adc     rdx,0
175         add     r13,rcx
176         mov     rcx,rdx
177         adc     rcx,0
178
179         mul     r9
180         add     r14,rax
181         mov     rax,QWORD[48+rsi]
182         adc     rdx,0
183         add     r14,rcx
184         mov     rcx,rdx
185         adc     rcx,0
186
187         mul     r9
188         mov     r10,r12
189         lea     r12,[r12*2+rbx]
190         add     r15,rax
191         mov     rax,QWORD[56+rsi]
192         adc     rdx,0
193         add     r15,rcx
194         mov     rcx,rdx
195         adc     rcx,0
196
197         mul     r9
198         shr     r10,63
199         add     r8,rax
200         mov     rax,r9
201         adc     rdx,0
202         add     r8,rcx
203         mov     r9,rdx
204         adc     r9,0
205
206         mov     rcx,r13
207         lea     r13,[r13*2+r10]
208
209         mul     rax
210         add     r11,rax
211         adc     r12,rdx
212         adc     r13,0
213
214         mov     QWORD[32+rsp],r11
215         mov     QWORD[40+rsp],r12
216         shr     rcx,63
217
218
219         mov     r10,QWORD[24+rsi]
220         mov     rax,QWORD[32+rsi]
221         mul     r10
222         add     r14,rax
223         mov     rax,QWORD[40+rsi]
224         mov     rbx,rdx
225         adc     rbx,0
226
227         mul     r10
228         add     r15,rax
229         mov     rax,QWORD[48+rsi]
230         adc     rdx,0
231         add     r15,rbx
232         mov     rbx,rdx
233         adc     rbx,0
234
235         mul     r10
236         mov     r12,r14
237         lea     r14,[r14*2+rcx]
238         add     r8,rax
239         mov     rax,QWORD[56+rsi]
240         adc     rdx,0
241         add     r8,rbx
242         mov     rbx,rdx
243         adc     rbx,0
244
245         mul     r10
246         shr     r12,63
247         add     r9,rax
248         mov     rax,r10
249         adc     rdx,0
250         add     r9,rbx
251         mov     r10,rdx
252         adc     r10,0
253
254         mov     rbx,r15
255         lea     r15,[r15*2+r12]
256
257         mul     rax
258         add     r13,rax
259         adc     r14,rdx
260         adc     r15,0
261
262         mov     QWORD[48+rsp],r13
263         mov     QWORD[56+rsp],r14
264         shr     rbx,63
265
266
267         mov     r11,QWORD[32+rsi]
268         mov     rax,QWORD[40+rsi]
269         mul     r11
270         add     r8,rax
271         mov     rax,QWORD[48+rsi]
272         mov     rcx,rdx
273         adc     rcx,0
274
275         mul     r11
276         add     r9,rax
277         mov     rax,QWORD[56+rsi]
278         adc     rdx,0
279         mov     r12,r8
280         lea     r8,[r8*2+rbx]
281         add     r9,rcx
282         mov     rcx,rdx
283         adc     rcx,0
284
285         mul     r11
286         shr     r12,63
287         add     r10,rax
288         mov     rax,r11
289         adc     rdx,0
290         add     r10,rcx
291         mov     r11,rdx
292         adc     r11,0
293
294         mov     rcx,r9
295         lea     r9,[r9*2+r12]
296
297         mul     rax
298         add     r15,rax
299         adc     r8,rdx
300         adc     r9,0
301
302         mov     QWORD[64+rsp],r15
303         mov     QWORD[72+rsp],r8
304         shr     rcx,63
305
306
307         mov     r12,QWORD[40+rsi]
308         mov     rax,QWORD[48+rsi]
309         mul     r12
310         add     r10,rax
311         mov     rax,QWORD[56+rsi]
312         mov     rbx,rdx
313         adc     rbx,0
314
315         mul     r12
316         add     r11,rax
317         mov     rax,r12
318         mov     r15,r10
319         lea     r10,[r10*2+rcx]
320         adc     rdx,0
321         shr     r15,63
322         add     r11,rbx
323         mov     r12,rdx
324         adc     r12,0
325
326         mov     rbx,r11
327         lea     r11,[r11*2+r15]
328
329         mul     rax
330         add     r9,rax
331         adc     r10,rdx
332         adc     r11,0
333
334         mov     QWORD[80+rsp],r9
335         mov     QWORD[88+rsp],r10
336
337
338         mov     r13,QWORD[48+rsi]
339         mov     rax,QWORD[56+rsi]
340         mul     r13
341         add     r12,rax
342         mov     rax,r13
343         mov     r13,rdx
344         adc     r13,0
345
346         xor     r14,r14
347         shl     rbx,1
348         adc     r12,r12
349         adc     r13,r13
350         adc     r14,r14
351
352         mul     rax
353         add     r11,rax
354         adc     r12,rdx
355         adc     r13,0
356
357         mov     QWORD[96+rsp],r11
358         mov     QWORD[104+rsp],r12
359
360
361         mov     rax,QWORD[56+rsi]
362         mul     rax
363         add     r13,rax
364         adc     rdx,0
365
366         add     r14,rdx
367
368         mov     QWORD[112+rsp],r13
369         mov     QWORD[120+rsp],r14
370
371         mov     r8,QWORD[rsp]
372         mov     r9,QWORD[8+rsp]
373         mov     r10,QWORD[16+rsp]
374         mov     r11,QWORD[24+rsp]
375         mov     r12,QWORD[32+rsp]
376         mov     r13,QWORD[40+rsp]
377         mov     r14,QWORD[48+rsp]
378         mov     r15,QWORD[56+rsp]
379
380         call    __rsaz_512_reduce
381
382         add     r8,QWORD[64+rsp]
383         adc     r9,QWORD[72+rsp]
384         adc     r10,QWORD[80+rsp]
385         adc     r11,QWORD[88+rsp]
386         adc     r12,QWORD[96+rsp]
387         adc     r13,QWORD[104+rsp]
388         adc     r14,QWORD[112+rsp]
389         adc     r15,QWORD[120+rsp]
390         sbb     rcx,rcx
391
392         call    __rsaz_512_subtract
393
394         mov     rdx,r8
395         mov     rax,r9
396         mov     r8d,DWORD[((128+8))+rsp]
397         mov     rsi,rdi
398
399         dec     r8d
400         jnz     NEAR $L$oop_sqr
401
402         lea     rax,[((128+24+48))+rsp]
403         mov     r15,QWORD[((-48))+rax]
404         mov     r14,QWORD[((-40))+rax]
405         mov     r13,QWORD[((-32))+rax]
406         mov     r12,QWORD[((-24))+rax]
407         mov     rbp,QWORD[((-16))+rax]
408         mov     rbx,QWORD[((-8))+rax]
409         lea     rsp,[rax]
410 $L$sqr_epilogue:
411         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
412         mov     rsi,QWORD[16+rsp]
413         DB      0F3h,0C3h               ;repret
414 $L$SEH_end_rsaz_512_sqr:
415 global  rsaz_512_mul
416
417 ALIGN   32
418 rsaz_512_mul:
419         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
420         mov     QWORD[16+rsp],rsi
421         mov     rax,rsp
422 $L$SEH_begin_rsaz_512_mul:
423         mov     rdi,rcx
424         mov     rsi,rdx
425         mov     rdx,r8
426         mov     rcx,r9
427         mov     r8,QWORD[40+rsp]
428
429
430         push    rbx
431         push    rbp
432         push    r12
433         push    r13
434         push    r14
435         push    r15
436
437         sub     rsp,128+24
438 $L$mul_body:
439 DB      102,72,15,110,199
440 DB      102,72,15,110,201
441         mov     QWORD[128+rsp],r8
442         mov     rbx,QWORD[rdx]
443         mov     rbp,rdx
444         call    __rsaz_512_mul
445
446 DB      102,72,15,126,199
447 DB      102,72,15,126,205
448
449         mov     r8,QWORD[rsp]
450         mov     r9,QWORD[8+rsp]
451         mov     r10,QWORD[16+rsp]
452         mov     r11,QWORD[24+rsp]
453         mov     r12,QWORD[32+rsp]
454         mov     r13,QWORD[40+rsp]
455         mov     r14,QWORD[48+rsp]
456         mov     r15,QWORD[56+rsp]
457
458         call    __rsaz_512_reduce
459         add     r8,QWORD[64+rsp]
460         adc     r9,QWORD[72+rsp]
461         adc     r10,QWORD[80+rsp]
462         adc     r11,QWORD[88+rsp]
463         adc     r12,QWORD[96+rsp]
464         adc     r13,QWORD[104+rsp]
465         adc     r14,QWORD[112+rsp]
466         adc     r15,QWORD[120+rsp]
467         sbb     rcx,rcx
468
469         call    __rsaz_512_subtract
470
471         lea     rax,[((128+24+48))+rsp]
472         mov     r15,QWORD[((-48))+rax]
473         mov     r14,QWORD[((-40))+rax]
474         mov     r13,QWORD[((-32))+rax]
475         mov     r12,QWORD[((-24))+rax]
476         mov     rbp,QWORD[((-16))+rax]
477         mov     rbx,QWORD[((-8))+rax]
478         lea     rsp,[rax]
479 $L$mul_epilogue:
480         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
481         mov     rsi,QWORD[16+rsp]
482         DB      0F3h,0C3h               ;repret
483 $L$SEH_end_rsaz_512_mul:
484 global  rsaz_512_mul_gather4
485
486 ALIGN   32
487 rsaz_512_mul_gather4:
488         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
489         mov     QWORD[16+rsp],rsi
490         mov     rax,rsp
491 $L$SEH_begin_rsaz_512_mul_gather4:
492         mov     rdi,rcx
493         mov     rsi,rdx
494         mov     rdx,r8
495         mov     rcx,r9
496         mov     r8,QWORD[40+rsp]
497         mov     r9,QWORD[48+rsp]
498
499
500         push    rbx
501         push    rbp
502         push    r12
503         push    r13
504         push    r14
505         push    r15
506
507         mov     r9d,r9d
508         sub     rsp,128+24
509 $L$mul_gather4_body:
510         mov     eax,DWORD[64+r9*4+rdx]
511 DB      102,72,15,110,199
512         mov     ebx,DWORD[r9*4+rdx]
513 DB      102,72,15,110,201
514         mov     QWORD[128+rsp],r8
515
516         shl     rax,32
517         or      rbx,rax
518         mov     rax,QWORD[rsi]
519         mov     rcx,QWORD[8+rsi]
520         lea     rbp,[128+r9*4+rdx]
521         mul     rbx
522         mov     QWORD[rsp],rax
523         mov     rax,rcx
524         mov     r8,rdx
525
526         mul     rbx
527         movd    xmm4,DWORD[rbp]
528         add     r8,rax
529         mov     rax,QWORD[16+rsi]
530         mov     r9,rdx
531         adc     r9,0
532
533         mul     rbx
534         movd    xmm5,DWORD[64+rbp]
535         add     r9,rax
536         mov     rax,QWORD[24+rsi]
537         mov     r10,rdx
538         adc     r10,0
539
540         mul     rbx
541         pslldq  xmm5,4
542         add     r10,rax
543         mov     rax,QWORD[32+rsi]
544         mov     r11,rdx
545         adc     r11,0
546
547         mul     rbx
548         por     xmm4,xmm5
549         add     r11,rax
550         mov     rax,QWORD[40+rsi]
551         mov     r12,rdx
552         adc     r12,0
553
554         mul     rbx
555         add     r12,rax
556         mov     rax,QWORD[48+rsi]
557         mov     r13,rdx
558         adc     r13,0
559
560         mul     rbx
561         lea     rbp,[128+rbp]
562         add     r13,rax
563         mov     rax,QWORD[56+rsi]
564         mov     r14,rdx
565         adc     r14,0
566
567         mul     rbx
568 DB      102,72,15,126,227
569         add     r14,rax
570         mov     rax,QWORD[rsi]
571         mov     r15,rdx
572         adc     r15,0
573
574         lea     rdi,[8+rsp]
575         mov     ecx,7
576         jmp     NEAR $L$oop_mul_gather
577
578 ALIGN   32
579 $L$oop_mul_gather:
580         mul     rbx
581         add     r8,rax
582         mov     rax,QWORD[8+rsi]
583         mov     QWORD[rdi],r8
584         mov     r8,rdx
585         adc     r8,0
586
587         mul     rbx
588         movd    xmm4,DWORD[rbp]
589         add     r9,rax
590         mov     rax,QWORD[16+rsi]
591         adc     rdx,0
592         add     r8,r9
593         mov     r9,rdx
594         adc     r9,0
595
596         mul     rbx
597         movd    xmm5,DWORD[64+rbp]
598         add     r10,rax
599         mov     rax,QWORD[24+rsi]
600         adc     rdx,0
601         add     r9,r10
602         mov     r10,rdx
603         adc     r10,0
604
605         mul     rbx
606         pslldq  xmm5,4
607         add     r11,rax
608         mov     rax,QWORD[32+rsi]
609         adc     rdx,0
610         add     r10,r11
611         mov     r11,rdx
612         adc     r11,0
613
614         mul     rbx
615         por     xmm4,xmm5
616         add     r12,rax
617         mov     rax,QWORD[40+rsi]
618         adc     rdx,0
619         add     r11,r12
620         mov     r12,rdx
621         adc     r12,0
622
623         mul     rbx
624         add     r13,rax
625         mov     rax,QWORD[48+rsi]
626         adc     rdx,0
627         add     r12,r13
628         mov     r13,rdx
629         adc     r13,0
630
631         mul     rbx
632         add     r14,rax
633         mov     rax,QWORD[56+rsi]
634         adc     rdx,0
635         add     r13,r14
636         mov     r14,rdx
637         adc     r14,0
638
639         mul     rbx
640 DB      102,72,15,126,227
641         add     r15,rax
642         mov     rax,QWORD[rsi]
643         adc     rdx,0
644         add     r14,r15
645         mov     r15,rdx
646         adc     r15,0
647
648         lea     rbp,[128+rbp]
649         lea     rdi,[8+rdi]
650
651         dec     ecx
652         jnz     NEAR $L$oop_mul_gather
653
654         mov     QWORD[rdi],r8
655         mov     QWORD[8+rdi],r9
656         mov     QWORD[16+rdi],r10
657         mov     QWORD[24+rdi],r11
658         mov     QWORD[32+rdi],r12
659         mov     QWORD[40+rdi],r13
660         mov     QWORD[48+rdi],r14
661         mov     QWORD[56+rdi],r15
662
663 DB      102,72,15,126,199
664 DB      102,72,15,126,205
665
666         mov     r8,QWORD[rsp]
667         mov     r9,QWORD[8+rsp]
668         mov     r10,QWORD[16+rsp]
669         mov     r11,QWORD[24+rsp]
670         mov     r12,QWORD[32+rsp]
671         mov     r13,QWORD[40+rsp]
672         mov     r14,QWORD[48+rsp]
673         mov     r15,QWORD[56+rsp]
674
675         call    __rsaz_512_reduce
676         add     r8,QWORD[64+rsp]
677         adc     r9,QWORD[72+rsp]
678         adc     r10,QWORD[80+rsp]
679         adc     r11,QWORD[88+rsp]
680         adc     r12,QWORD[96+rsp]
681         adc     r13,QWORD[104+rsp]
682         adc     r14,QWORD[112+rsp]
683         adc     r15,QWORD[120+rsp]
684         sbb     rcx,rcx
685
686         call    __rsaz_512_subtract
687
688         lea     rax,[((128+24+48))+rsp]
689         mov     r15,QWORD[((-48))+rax]
690         mov     r14,QWORD[((-40))+rax]
691         mov     r13,QWORD[((-32))+rax]
692         mov     r12,QWORD[((-24))+rax]
693         mov     rbp,QWORD[((-16))+rax]
694         mov     rbx,QWORD[((-8))+rax]
695         lea     rsp,[rax]
696 $L$mul_gather4_epilogue:
697         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
698         mov     rsi,QWORD[16+rsp]
699         DB      0F3h,0C3h               ;repret
700 $L$SEH_end_rsaz_512_mul_gather4:
701 global  rsaz_512_mul_scatter4
702
703 ALIGN   32
704 rsaz_512_mul_scatter4:
705         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
706         mov     QWORD[16+rsp],rsi
707         mov     rax,rsp
708 $L$SEH_begin_rsaz_512_mul_scatter4:
709         mov     rdi,rcx
710         mov     rsi,rdx
711         mov     rdx,r8
712         mov     rcx,r9
713         mov     r8,QWORD[40+rsp]
714         mov     r9,QWORD[48+rsp]
715
716
717         push    rbx
718         push    rbp
719         push    r12
720         push    r13
721         push    r14
722         push    r15
723
724         mov     r9d,r9d
725         sub     rsp,128+24
726 $L$mul_scatter4_body:
727         lea     r8,[r9*4+r8]
728 DB      102,72,15,110,199
729 DB      102,72,15,110,202
730 DB      102,73,15,110,208
731         mov     QWORD[128+rsp],rcx
732
733         mov     rbp,rdi
734         mov     rbx,QWORD[rdi]
735         call    __rsaz_512_mul
736
737 DB      102,72,15,126,199
738 DB      102,72,15,126,205
739
740         mov     r8,QWORD[rsp]
741         mov     r9,QWORD[8+rsp]
742         mov     r10,QWORD[16+rsp]
743         mov     r11,QWORD[24+rsp]
744         mov     r12,QWORD[32+rsp]
745         mov     r13,QWORD[40+rsp]
746         mov     r14,QWORD[48+rsp]
747         mov     r15,QWORD[56+rsp]
748
749         call    __rsaz_512_reduce
750         add     r8,QWORD[64+rsp]
751         adc     r9,QWORD[72+rsp]
752         adc     r10,QWORD[80+rsp]
753         adc     r11,QWORD[88+rsp]
754         adc     r12,QWORD[96+rsp]
755         adc     r13,QWORD[104+rsp]
756         adc     r14,QWORD[112+rsp]
757         adc     r15,QWORD[120+rsp]
758 DB      102,72,15,126,214
759         sbb     rcx,rcx
760
761         call    __rsaz_512_subtract
762
763         mov     DWORD[rsi],r8d
764         shr     r8,32
765         mov     DWORD[128+rsi],r9d
766         shr     r9,32
767         mov     DWORD[256+rsi],r10d
768         shr     r10,32
769         mov     DWORD[384+rsi],r11d
770         shr     r11,32
771         mov     DWORD[512+rsi],r12d
772         shr     r12,32
773         mov     DWORD[640+rsi],r13d
774         shr     r13,32
775         mov     DWORD[768+rsi],r14d
776         shr     r14,32
777         mov     DWORD[896+rsi],r15d
778         shr     r15,32
779         mov     DWORD[64+rsi],r8d
780         mov     DWORD[192+rsi],r9d
781         mov     DWORD[320+rsi],r10d
782         mov     DWORD[448+rsi],r11d
783         mov     DWORD[576+rsi],r12d
784         mov     DWORD[704+rsi],r13d
785         mov     DWORD[832+rsi],r14d
786         mov     DWORD[960+rsi],r15d
787
788         lea     rax,[((128+24+48))+rsp]
789         mov     r15,QWORD[((-48))+rax]
790         mov     r14,QWORD[((-40))+rax]
791         mov     r13,QWORD[((-32))+rax]
792         mov     r12,QWORD[((-24))+rax]
793         mov     rbp,QWORD[((-16))+rax]
794         mov     rbx,QWORD[((-8))+rax]
795         lea     rsp,[rax]
796 $L$mul_scatter4_epilogue:
797         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
798         mov     rsi,QWORD[16+rsp]
799         DB      0F3h,0C3h               ;repret
800 $L$SEH_end_rsaz_512_mul_scatter4:
801 global  rsaz_512_mul_by_one
802
803 ALIGN   32
804 rsaz_512_mul_by_one:
805         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
806         mov     QWORD[16+rsp],rsi
807         mov     rax,rsp
808 $L$SEH_begin_rsaz_512_mul_by_one:
809         mov     rdi,rcx
810         mov     rsi,rdx
811         mov     rdx,r8
812         mov     rcx,r9
813
814
815         push    rbx
816         push    rbp
817         push    r12
818         push    r13
819         push    r14
820         push    r15
821
822         sub     rsp,128+24
823 $L$mul_by_one_body:
824         mov     rbp,rdx
825         mov     QWORD[128+rsp],rcx
826
827         mov     r8,QWORD[rsi]
828         pxor    xmm0,xmm0
829         mov     r9,QWORD[8+rsi]
830         mov     r10,QWORD[16+rsi]
831         mov     r11,QWORD[24+rsi]
832         mov     r12,QWORD[32+rsi]
833         mov     r13,QWORD[40+rsi]
834         mov     r14,QWORD[48+rsi]
835         mov     r15,QWORD[56+rsi]
836
837         movdqa  XMMWORD[rsp],xmm0
838         movdqa  XMMWORD[16+rsp],xmm0
839         movdqa  XMMWORD[32+rsp],xmm0
840         movdqa  XMMWORD[48+rsp],xmm0
841         movdqa  XMMWORD[64+rsp],xmm0
842         movdqa  XMMWORD[80+rsp],xmm0
843         movdqa  XMMWORD[96+rsp],xmm0
844         call    __rsaz_512_reduce
845         mov     QWORD[rdi],r8
846         mov     QWORD[8+rdi],r9
847         mov     QWORD[16+rdi],r10
848         mov     QWORD[24+rdi],r11
849         mov     QWORD[32+rdi],r12
850         mov     QWORD[40+rdi],r13
851         mov     QWORD[48+rdi],r14
852         mov     QWORD[56+rdi],r15
853
854         lea     rax,[((128+24+48))+rsp]
855         mov     r15,QWORD[((-48))+rax]
856         mov     r14,QWORD[((-40))+rax]
857         mov     r13,QWORD[((-32))+rax]
858         mov     r12,QWORD[((-24))+rax]
859         mov     rbp,QWORD[((-16))+rax]
860         mov     rbx,QWORD[((-8))+rax]
861         lea     rsp,[rax]
862 $L$mul_by_one_epilogue:
863         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
864         mov     rsi,QWORD[16+rsp]
865         DB      0F3h,0C3h               ;repret
866 $L$SEH_end_rsaz_512_mul_by_one:
867
868 ALIGN   32
869 __rsaz_512_reduce:
870         mov     rbx,r8
871         imul    rbx,QWORD[((128+8))+rsp]
872         mov     rax,QWORD[rbp]
873         mov     ecx,8
874         jmp     NEAR $L$reduction_loop
875
876 ALIGN   32
877 $L$reduction_loop:
878         mul     rbx
879         mov     rax,QWORD[8+rbp]
880         neg     r8
881         mov     r8,rdx
882         adc     r8,0
883
884         mul     rbx
885         add     r9,rax
886         mov     rax,QWORD[16+rbp]
887         adc     rdx,0
888         add     r8,r9
889         mov     r9,rdx
890         adc     r9,0
891
892         mul     rbx
893         add     r10,rax
894         mov     rax,QWORD[24+rbp]
895         adc     rdx,0
896         add     r9,r10
897         mov     r10,rdx
898         adc     r10,0
899
900         mul     rbx
901         add     r11,rax
902         mov     rax,QWORD[32+rbp]
903         adc     rdx,0
904         add     r10,r11
905         mov     rsi,QWORD[((128+8))+rsp]
906
907
908         adc     rdx,0
909         mov     r11,rdx
910
911         mul     rbx
912         add     r12,rax
913         mov     rax,QWORD[40+rbp]
914         adc     rdx,0
915         imul    rsi,r8
916         add     r11,r12
917         mov     r12,rdx
918         adc     r12,0
919
920         mul     rbx
921         add     r13,rax
922         mov     rax,QWORD[48+rbp]
923         adc     rdx,0
924         add     r12,r13
925         mov     r13,rdx
926         adc     r13,0
927
928         mul     rbx
929         add     r14,rax
930         mov     rax,QWORD[56+rbp]
931         adc     rdx,0
932         add     r13,r14
933         mov     r14,rdx
934         adc     r14,0
935
936         mul     rbx
937         mov     rbx,rsi
938         add     r15,rax
939         mov     rax,QWORD[rbp]
940         adc     rdx,0
941         add     r14,r15
942         mov     r15,rdx
943         adc     r15,0
944
945         dec     ecx
946         jne     NEAR $L$reduction_loop
947
948         DB      0F3h,0C3h               ;repret
949
950
951 ALIGN   32
952 __rsaz_512_subtract:
953         mov     QWORD[rdi],r8
954         mov     QWORD[8+rdi],r9
955         mov     QWORD[16+rdi],r10
956         mov     QWORD[24+rdi],r11
957         mov     QWORD[32+rdi],r12
958         mov     QWORD[40+rdi],r13
959         mov     QWORD[48+rdi],r14
960         mov     QWORD[56+rdi],r15
961
962         mov     r8,QWORD[rbp]
963         mov     r9,QWORD[8+rbp]
964         neg     r8
965         not     r9
966         and     r8,rcx
967         mov     r10,QWORD[16+rbp]
968         and     r9,rcx
969         not     r10
970         mov     r11,QWORD[24+rbp]
971         and     r10,rcx
972         not     r11
973         mov     r12,QWORD[32+rbp]
974         and     r11,rcx
975         not     r12
976         mov     r13,QWORD[40+rbp]
977         and     r12,rcx
978         not     r13
979         mov     r14,QWORD[48+rbp]
980         and     r13,rcx
981         not     r14
982         mov     r15,QWORD[56+rbp]
983         and     r14,rcx
984         not     r15
985         and     r15,rcx
986
987         add     r8,QWORD[rdi]
988         adc     r9,QWORD[8+rdi]
989         adc     r10,QWORD[16+rdi]
990         adc     r11,QWORD[24+rdi]
991         adc     r12,QWORD[32+rdi]
992         adc     r13,QWORD[40+rdi]
993         adc     r14,QWORD[48+rdi]
994         adc     r15,QWORD[56+rdi]
995
996         mov     QWORD[rdi],r8
997         mov     QWORD[8+rdi],r9
998         mov     QWORD[16+rdi],r10
999         mov     QWORD[24+rdi],r11
1000         mov     QWORD[32+rdi],r12
1001         mov     QWORD[40+rdi],r13
1002         mov     QWORD[48+rdi],r14
1003         mov     QWORD[56+rdi],r15
1004
1005         DB      0F3h,0C3h               ;repret
1006
1007
1008 ALIGN   32
1009 __rsaz_512_mul:
1010         lea     rdi,[8+rsp]
1011
1012         mov     rax,QWORD[rsi]
1013         mul     rbx
1014         mov     QWORD[rdi],rax
1015         mov     rax,QWORD[8+rsi]
1016         mov     r8,rdx
1017
1018         mul     rbx
1019         add     r8,rax
1020         mov     rax,QWORD[16+rsi]
1021         mov     r9,rdx
1022         adc     r9,0
1023
1024         mul     rbx
1025         add     r9,rax
1026         mov     rax,QWORD[24+rsi]
1027         mov     r10,rdx
1028         adc     r10,0
1029
1030         mul     rbx
1031         add     r10,rax
1032         mov     rax,QWORD[32+rsi]
1033         mov     r11,rdx
1034         adc     r11,0
1035
1036         mul     rbx
1037         add     r11,rax
1038         mov     rax,QWORD[40+rsi]
1039         mov     r12,rdx
1040         adc     r12,0
1041
1042         mul     rbx
1043         add     r12,rax
1044         mov     rax,QWORD[48+rsi]
1045         mov     r13,rdx
1046         adc     r13,0
1047
1048         mul     rbx
1049         add     r13,rax
1050         mov     rax,QWORD[56+rsi]
1051         mov     r14,rdx
1052         adc     r14,0
1053
1054         mul     rbx
1055         add     r14,rax
1056         mov     rax,QWORD[rsi]
1057         mov     r15,rdx
1058         adc     r15,0
1059
1060         lea     rbp,[8+rbp]
1061         lea     rdi,[8+rdi]
1062
1063         mov     ecx,7
1064         jmp     NEAR $L$oop_mul
1065
1066 ALIGN   32
1067 $L$oop_mul:
1068         mov     rbx,QWORD[rbp]
1069         mul     rbx
1070         add     r8,rax
1071         mov     rax,QWORD[8+rsi]
1072         mov     QWORD[rdi],r8
1073         mov     r8,rdx
1074         adc     r8,0
1075
1076         mul     rbx
1077         add     r9,rax
1078         mov     rax,QWORD[16+rsi]
1079         adc     rdx,0
1080         add     r8,r9
1081         mov     r9,rdx
1082         adc     r9,0
1083
1084         mul     rbx
1085         add     r10,rax
1086         mov     rax,QWORD[24+rsi]
1087         adc     rdx,0
1088         add     r9,r10
1089         mov     r10,rdx
1090         adc     r10,0
1091
1092         mul     rbx
1093         add     r11,rax
1094         mov     rax,QWORD[32+rsi]
1095         adc     rdx,0
1096         add     r10,r11
1097         mov     r11,rdx
1098         adc     r11,0
1099
1100         mul     rbx
1101         add     r12,rax
1102         mov     rax,QWORD[40+rsi]
1103         adc     rdx,0
1104         add     r11,r12
1105         mov     r12,rdx
1106         adc     r12,0
1107
1108         mul     rbx
1109         add     r13,rax
1110         mov     rax,QWORD[48+rsi]
1111         adc     rdx,0
1112         add     r12,r13
1113         mov     r13,rdx
1114         adc     r13,0
1115
1116         mul     rbx
1117         add     r14,rax
1118         mov     rax,QWORD[56+rsi]
1119         adc     rdx,0
1120         add     r13,r14
1121         mov     r14,rdx
1122         lea     rbp,[8+rbp]
1123         adc     r14,0
1124
1125         mul     rbx
1126         add     r15,rax
1127         mov     rax,QWORD[rsi]
1128         adc     rdx,0
1129         add     r14,r15
1130         mov     r15,rdx
1131         adc     r15,0
1132
1133         lea     rdi,[8+rdi]
1134
1135         dec     ecx
1136         jnz     NEAR $L$oop_mul
1137
1138         mov     QWORD[rdi],r8
1139         mov     QWORD[8+rdi],r9
1140         mov     QWORD[16+rdi],r10
1141         mov     QWORD[24+rdi],r11
1142         mov     QWORD[32+rdi],r12
1143         mov     QWORD[40+rdi],r13
1144         mov     QWORD[48+rdi],r14
1145         mov     QWORD[56+rdi],r15
1146
1147         DB      0F3h,0C3h               ;repret
1148
1149 global  rsaz_512_scatter4
1150
1151 ALIGN   16
1152 rsaz_512_scatter4:
1153         lea     rcx,[r8*4+rcx]
1154         mov     r9d,8
1155         jmp     NEAR $L$oop_scatter
1156 ALIGN   16
1157 $L$oop_scatter:
1158         mov     rax,QWORD[rdx]
1159         lea     rdx,[8+rdx]
1160         mov     DWORD[rcx],eax
1161         shr     rax,32
1162         mov     DWORD[64+rcx],eax
1163         lea     rcx,[128+rcx]
1164         dec     r9d
1165         jnz     NEAR $L$oop_scatter
1166         DB      0F3h,0C3h               ;repret
1167
1168
1169 global  rsaz_512_gather4
1170
1171 ALIGN   16
1172 rsaz_512_gather4:
1173         lea     rdx,[r8*4+rdx]
1174         mov     r9d,8
1175         jmp     NEAR $L$oop_gather
1176 ALIGN   16
1177 $L$oop_gather:
1178         mov     eax,DWORD[rdx]
1179         mov     r8d,DWORD[64+rdx]
1180         lea     rdx,[128+rdx]
1181         shl     r8,32
1182         or      rax,r8
1183         mov     QWORD[rcx],rax
1184         lea     rcx,[8+rcx]
1185         dec     r9d
1186         jnz     NEAR $L$oop_gather
1187         DB      0F3h,0C3h               ;repret
1188
1189 EXTERN  __imp_RtlVirtualUnwind
1190
1191 ALIGN   16
1192 se_handler:
1193         push    rsi
1194         push    rdi
1195         push    rbx
1196         push    rbp
1197         push    r12
1198         push    r13
1199         push    r14
1200         push    r15
1201         pushfq
1202         sub     rsp,64
1203
1204         mov     rax,QWORD[120+r8]
1205         mov     rbx,QWORD[248+r8]
1206
1207         mov     rsi,QWORD[8+r9]
1208         mov     r11,QWORD[56+r9]
1209
1210         mov     r10d,DWORD[r11]
1211         lea     r10,[r10*1+rsi]
1212         cmp     rbx,r10
1213         jb      NEAR $L$common_seh_tail
1214
1215         mov     rax,QWORD[152+r8]
1216
1217         mov     r10d,DWORD[4+r11]
1218         lea     r10,[r10*1+rsi]
1219         cmp     rbx,r10
1220         jae     NEAR $L$common_seh_tail
1221
1222         lea     rax,[((128+24+48))+rax]
1223
1224         mov     rbx,QWORD[((-8))+rax]
1225         mov     rbp,QWORD[((-16))+rax]
1226         mov     r12,QWORD[((-24))+rax]
1227         mov     r13,QWORD[((-32))+rax]
1228         mov     r14,QWORD[((-40))+rax]
1229         mov     r15,QWORD[((-48))+rax]
1230         mov     QWORD[144+r8],rbx
1231         mov     QWORD[160+r8],rbp
1232         mov     QWORD[216+r8],r12
1233         mov     QWORD[224+r8],r13
1234         mov     QWORD[232+r8],r14
1235         mov     QWORD[240+r8],r15
1236
1237 $L$common_seh_tail:
1238         mov     rdi,QWORD[8+rax]
1239         mov     rsi,QWORD[16+rax]
1240         mov     QWORD[152+r8],rax
1241         mov     QWORD[168+r8],rsi
1242         mov     QWORD[176+r8],rdi
1243
1244         mov     rdi,QWORD[40+r9]
1245         mov     rsi,r8
1246         mov     ecx,154
1247         DD      0xa548f3fc
1248
1249         mov     rsi,r9
1250         xor     rcx,rcx
1251         mov     rdx,QWORD[8+rsi]
1252         mov     r8,QWORD[rsi]
1253         mov     r9,QWORD[16+rsi]
1254         mov     r10,QWORD[40+rsi]
1255         lea     r11,[56+rsi]
1256         lea     r12,[24+rsi]
1257         mov     QWORD[32+rsp],r10
1258         mov     QWORD[40+rsp],r11
1259         mov     QWORD[48+rsp],r12
1260         mov     QWORD[56+rsp],rcx
1261         call    QWORD[__imp_RtlVirtualUnwind]
1262
1263         mov     eax,1
1264         add     rsp,64
1265         popfq
1266         pop     r15
1267         pop     r14
1268         pop     r13
1269         pop     r12
1270         pop     rbp
1271         pop     rbx
1272         pop     rdi
1273         pop     rsi
1274         DB      0F3h,0C3h               ;repret
1275
1276
1277 section .pdata rdata align=4
1278 ALIGN   4
1279         DD      $L$SEH_begin_rsaz_512_sqr wrt ..imagebase
1280         DD      $L$SEH_end_rsaz_512_sqr wrt ..imagebase
1281         DD      $L$SEH_info_rsaz_512_sqr wrt ..imagebase
1282
1283         DD      $L$SEH_begin_rsaz_512_mul wrt ..imagebase
1284         DD      $L$SEH_end_rsaz_512_mul wrt ..imagebase
1285         DD      $L$SEH_info_rsaz_512_mul wrt ..imagebase
1286
1287         DD      $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase
1288         DD      $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase
1289         DD      $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase
1290
1291         DD      $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase
1292         DD      $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase
1293         DD      $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase
1294
1295         DD      $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase
1296         DD      $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
1297         DD      $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
1298
1299 section .xdata rdata align=8
1300 ALIGN   8
1301 $L$SEH_info_rsaz_512_sqr:
1302 DB      9,0,0,0
1303         DD      se_handler wrt ..imagebase
1304         DD      $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
1305 $L$SEH_info_rsaz_512_mul:
1306 DB      9,0,0,0
1307         DD      se_handler wrt ..imagebase
1308         DD      $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1309 $L$SEH_info_rsaz_512_mul_gather4:
1310 DB      9,0,0,0
1311         DD      se_handler wrt ..imagebase
1312         DD      $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase
1313 $L$SEH_info_rsaz_512_mul_scatter4:
1314 DB      9,0,0,0
1315         DD      se_handler wrt ..imagebase
1316         DD      $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase
1317 $L$SEH_info_rsaz_512_mul_by_one:
1318 DB      9,0,0,0
1319         DD      se_handler wrt ..imagebase
1320         DD      $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase