Upstream version 11.40.277.0
[platform/framework/web/crosswalk.git] / src / third_party / boringssl / win-x86_64 / crypto / bn / x86_64-mont.asm
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7
8 EXTERN  OPENSSL_ia32cap_P
9
10 global  bn_mul_mont
11
12 ALIGN   16
13 bn_mul_mont:
14         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
15         mov     QWORD[16+rsp],rsi
16         mov     rax,rsp
17 $L$SEH_begin_bn_mul_mont:
18         mov     rdi,rcx
19         mov     rsi,rdx
20         mov     rdx,r8
21         mov     rcx,r9
22         mov     r8,QWORD[40+rsp]
23         mov     r9,QWORD[48+rsp]
24
25
26         test    r9d,3
27         jnz     NEAR $L$mul_enter
28         cmp     r9d,8
29         jb      NEAR $L$mul_enter
30         cmp     rdx,rsi
31         jne     NEAR $L$mul4x_enter
32         test    r9d,7
33         jz      NEAR $L$sqr8x_enter
34         jmp     NEAR $L$mul4x_enter
35
36 ALIGN   16
37 $L$mul_enter:
38         push    rbx
39         push    rbp
40         push    r12
41         push    r13
42         push    r14
43         push    r15
44
45         mov     r9d,r9d
46         lea     r10,[2+r9]
47         mov     r11,rsp
48         neg     r10
49         lea     rsp,[r10*8+rsp]
50         and     rsp,-1024
51
52         mov     QWORD[8+r9*8+rsp],r11
53 $L$mul_body:
54         mov     r12,rdx
55         mov     r8,QWORD[r8]
56         mov     rbx,QWORD[r12]
57         mov     rax,QWORD[rsi]
58
59         xor     r14,r14
60         xor     r15,r15
61
62         mov     rbp,r8
63         mul     rbx
64         mov     r10,rax
65         mov     rax,QWORD[rcx]
66
67         imul    rbp,r10
68         mov     r11,rdx
69
70         mul     rbp
71         add     r10,rax
72         mov     rax,QWORD[8+rsi]
73         adc     rdx,0
74         mov     r13,rdx
75
76         lea     r15,[1+r15]
77         jmp     NEAR $L$1st_enter
78
79 ALIGN   16
80 $L$1st:
81         add     r13,rax
82         mov     rax,QWORD[r15*8+rsi]
83         adc     rdx,0
84         add     r13,r11
85         mov     r11,r10
86         adc     rdx,0
87         mov     QWORD[((-16))+r15*8+rsp],r13
88         mov     r13,rdx
89
90 $L$1st_enter:
91         mul     rbx
92         add     r11,rax
93         mov     rax,QWORD[r15*8+rcx]
94         adc     rdx,0
95         lea     r15,[1+r15]
96         mov     r10,rdx
97
98         mul     rbp
99         cmp     r15,r9
100         jne     NEAR $L$1st
101
102         add     r13,rax
103         mov     rax,QWORD[rsi]
104         adc     rdx,0
105         add     r13,r11
106         adc     rdx,0
107         mov     QWORD[((-16))+r15*8+rsp],r13
108         mov     r13,rdx
109         mov     r11,r10
110
111         xor     rdx,rdx
112         add     r13,r11
113         adc     rdx,0
114         mov     QWORD[((-8))+r9*8+rsp],r13
115         mov     QWORD[r9*8+rsp],rdx
116
117         lea     r14,[1+r14]
118         jmp     NEAR $L$outer
119 ALIGN   16
120 $L$outer:
121         mov     rbx,QWORD[r14*8+r12]
122         xor     r15,r15
123         mov     rbp,r8
124         mov     r10,QWORD[rsp]
125         mul     rbx
126         add     r10,rax
127         mov     rax,QWORD[rcx]
128         adc     rdx,0
129
130         imul    rbp,r10
131         mov     r11,rdx
132
133         mul     rbp
134         add     r10,rax
135         mov     rax,QWORD[8+rsi]
136         adc     rdx,0
137         mov     r10,QWORD[8+rsp]
138         mov     r13,rdx
139
140         lea     r15,[1+r15]
141         jmp     NEAR $L$inner_enter
142
143 ALIGN   16
144 $L$inner:
145         add     r13,rax
146         mov     rax,QWORD[r15*8+rsi]
147         adc     rdx,0
148         add     r13,r10
149         mov     r10,QWORD[r15*8+rsp]
150         adc     rdx,0
151         mov     QWORD[((-16))+r15*8+rsp],r13
152         mov     r13,rdx
153
154 $L$inner_enter:
155         mul     rbx
156         add     r11,rax
157         mov     rax,QWORD[r15*8+rcx]
158         adc     rdx,0
159         add     r10,r11
160         mov     r11,rdx
161         adc     r11,0
162         lea     r15,[1+r15]
163
164         mul     rbp
165         cmp     r15,r9
166         jne     NEAR $L$inner
167
168         add     r13,rax
169         mov     rax,QWORD[rsi]
170         adc     rdx,0
171         add     r13,r10
172         mov     r10,QWORD[r15*8+rsp]
173         adc     rdx,0
174         mov     QWORD[((-16))+r15*8+rsp],r13
175         mov     r13,rdx
176
177         xor     rdx,rdx
178         add     r13,r11
179         adc     rdx,0
180         add     r13,r10
181         adc     rdx,0
182         mov     QWORD[((-8))+r9*8+rsp],r13
183         mov     QWORD[r9*8+rsp],rdx
184
185         lea     r14,[1+r14]
186         cmp     r14,r9
187         jb      NEAR $L$outer
188
189         xor     r14,r14
190         mov     rax,QWORD[rsp]
191         lea     rsi,[rsp]
192         mov     r15,r9
193         jmp     NEAR $L$sub
194 ALIGN   16
195 $L$sub: sbb     rax,QWORD[r14*8+rcx]
196         mov     QWORD[r14*8+rdi],rax
197         mov     rax,QWORD[8+r14*8+rsi]
198         lea     r14,[1+r14]
199         dec     r15
200         jnz     NEAR $L$sub
201
202         sbb     rax,0
203         xor     r14,r14
204         mov     r15,r9
205 ALIGN   16
206 $L$copy:
207         mov     rsi,QWORD[r14*8+rsp]
208         mov     rcx,QWORD[r14*8+rdi]
209         xor     rsi,rcx
210         and     rsi,rax
211         xor     rsi,rcx
212         mov     QWORD[r14*8+rsp],r14
213         mov     QWORD[r14*8+rdi],rsi
214         lea     r14,[1+r14]
215         sub     r15,1
216         jnz     NEAR $L$copy
217
218         mov     rsi,QWORD[8+r9*8+rsp]
219         mov     rax,1
220         mov     r15,QWORD[rsi]
221         mov     r14,QWORD[8+rsi]
222         mov     r13,QWORD[16+rsi]
223         mov     r12,QWORD[24+rsi]
224         mov     rbp,QWORD[32+rsi]
225         mov     rbx,QWORD[40+rsi]
226         lea     rsp,[48+rsi]
227 $L$mul_epilogue:
228         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
229         mov     rsi,QWORD[16+rsp]
230         DB      0F3h,0C3h               ;repret
231 $L$SEH_end_bn_mul_mont:
232
233 ALIGN   16
234 bn_mul4x_mont:
235         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
236         mov     QWORD[16+rsp],rsi
237         mov     rax,rsp
238 $L$SEH_begin_bn_mul4x_mont:
239         mov     rdi,rcx
240         mov     rsi,rdx
241         mov     rdx,r8
242         mov     rcx,r9
243         mov     r8,QWORD[40+rsp]
244         mov     r9,QWORD[48+rsp]
245
246
247 $L$mul4x_enter:
248         push    rbx
249         push    rbp
250         push    r12
251         push    r13
252         push    r14
253         push    r15
254
255         mov     r9d,r9d
256         lea     r10,[4+r9]
257         mov     r11,rsp
258         neg     r10
259         lea     rsp,[r10*8+rsp]
260         and     rsp,-1024
261
262         mov     QWORD[8+r9*8+rsp],r11
263 $L$mul4x_body:
264         mov     QWORD[16+r9*8+rsp],rdi
265         mov     r12,rdx
266         mov     r8,QWORD[r8]
267         mov     rbx,QWORD[r12]
268         mov     rax,QWORD[rsi]
269
270         xor     r14,r14
271         xor     r15,r15
272
273         mov     rbp,r8
274         mul     rbx
275         mov     r10,rax
276         mov     rax,QWORD[rcx]
277
278         imul    rbp,r10
279         mov     r11,rdx
280
281         mul     rbp
282         add     r10,rax
283         mov     rax,QWORD[8+rsi]
284         adc     rdx,0
285         mov     rdi,rdx
286
287         mul     rbx
288         add     r11,rax
289         mov     rax,QWORD[8+rcx]
290         adc     rdx,0
291         mov     r10,rdx
292
293         mul     rbp
294         add     rdi,rax
295         mov     rax,QWORD[16+rsi]
296         adc     rdx,0
297         add     rdi,r11
298         lea     r15,[4+r15]
299         adc     rdx,0
300         mov     QWORD[rsp],rdi
301         mov     r13,rdx
302         jmp     NEAR $L$1st4x
303 ALIGN   16
304 $L$1st4x:
305         mul     rbx
306         add     r10,rax
307         mov     rax,QWORD[((-16))+r15*8+rcx]
308         adc     rdx,0
309         mov     r11,rdx
310
311         mul     rbp
312         add     r13,rax
313         mov     rax,QWORD[((-8))+r15*8+rsi]
314         adc     rdx,0
315         add     r13,r10
316         adc     rdx,0
317         mov     QWORD[((-24))+r15*8+rsp],r13
318         mov     rdi,rdx
319
320         mul     rbx
321         add     r11,rax
322         mov     rax,QWORD[((-8))+r15*8+rcx]
323         adc     rdx,0
324         mov     r10,rdx
325
326         mul     rbp
327         add     rdi,rax
328         mov     rax,QWORD[r15*8+rsi]
329         adc     rdx,0
330         add     rdi,r11
331         adc     rdx,0
332         mov     QWORD[((-16))+r15*8+rsp],rdi
333         mov     r13,rdx
334
335         mul     rbx
336         add     r10,rax
337         mov     rax,QWORD[r15*8+rcx]
338         adc     rdx,0
339         mov     r11,rdx
340
341         mul     rbp
342         add     r13,rax
343         mov     rax,QWORD[8+r15*8+rsi]
344         adc     rdx,0
345         add     r13,r10
346         adc     rdx,0
347         mov     QWORD[((-8))+r15*8+rsp],r13
348         mov     rdi,rdx
349
350         mul     rbx
351         add     r11,rax
352         mov     rax,QWORD[8+r15*8+rcx]
353         adc     rdx,0
354         lea     r15,[4+r15]
355         mov     r10,rdx
356
357         mul     rbp
358         add     rdi,rax
359         mov     rax,QWORD[((-16))+r15*8+rsi]
360         adc     rdx,0
361         add     rdi,r11
362         adc     rdx,0
363         mov     QWORD[((-32))+r15*8+rsp],rdi
364         mov     r13,rdx
365         cmp     r15,r9
366         jb      NEAR $L$1st4x
367
368         mul     rbx
369         add     r10,rax
370         mov     rax,QWORD[((-16))+r15*8+rcx]
371         adc     rdx,0
372         mov     r11,rdx
373
374         mul     rbp
375         add     r13,rax
376         mov     rax,QWORD[((-8))+r15*8+rsi]
377         adc     rdx,0
378         add     r13,r10
379         adc     rdx,0
380         mov     QWORD[((-24))+r15*8+rsp],r13
381         mov     rdi,rdx
382
383         mul     rbx
384         add     r11,rax
385         mov     rax,QWORD[((-8))+r15*8+rcx]
386         adc     rdx,0
387         mov     r10,rdx
388
389         mul     rbp
390         add     rdi,rax
391         mov     rax,QWORD[rsi]
392         adc     rdx,0
393         add     rdi,r11
394         adc     rdx,0
395         mov     QWORD[((-16))+r15*8+rsp],rdi
396         mov     r13,rdx
397
398         xor     rdi,rdi
399         add     r13,r10
400         adc     rdi,0
401         mov     QWORD[((-8))+r15*8+rsp],r13
402         mov     QWORD[r15*8+rsp],rdi
403
404         lea     r14,[1+r14]
405 ALIGN   4
406 $L$outer4x:
407         mov     rbx,QWORD[r14*8+r12]
408         xor     r15,r15
409         mov     r10,QWORD[rsp]
410         mov     rbp,r8
411         mul     rbx
412         add     r10,rax
413         mov     rax,QWORD[rcx]
414         adc     rdx,0
415
416         imul    rbp,r10
417         mov     r11,rdx
418
419         mul     rbp
420         add     r10,rax
421         mov     rax,QWORD[8+rsi]
422         adc     rdx,0
423         mov     rdi,rdx
424
425         mul     rbx
426         add     r11,rax
427         mov     rax,QWORD[8+rcx]
428         adc     rdx,0
429         add     r11,QWORD[8+rsp]
430         adc     rdx,0
431         mov     r10,rdx
432
433         mul     rbp
434         add     rdi,rax
435         mov     rax,QWORD[16+rsi]
436         adc     rdx,0
437         add     rdi,r11
438         lea     r15,[4+r15]
439         adc     rdx,0
440         mov     QWORD[rsp],rdi
441         mov     r13,rdx
442         jmp     NEAR $L$inner4x
443 ALIGN   16
444 $L$inner4x:
445         mul     rbx
446         add     r10,rax
447         mov     rax,QWORD[((-16))+r15*8+rcx]
448         adc     rdx,0
449         add     r10,QWORD[((-16))+r15*8+rsp]
450         adc     rdx,0
451         mov     r11,rdx
452
453         mul     rbp
454         add     r13,rax
455         mov     rax,QWORD[((-8))+r15*8+rsi]
456         adc     rdx,0
457         add     r13,r10
458         adc     rdx,0
459         mov     QWORD[((-24))+r15*8+rsp],r13
460         mov     rdi,rdx
461
462         mul     rbx
463         add     r11,rax
464         mov     rax,QWORD[((-8))+r15*8+rcx]
465         adc     rdx,0
466         add     r11,QWORD[((-8))+r15*8+rsp]
467         adc     rdx,0
468         mov     r10,rdx
469
470         mul     rbp
471         add     rdi,rax
472         mov     rax,QWORD[r15*8+rsi]
473         adc     rdx,0
474         add     rdi,r11
475         adc     rdx,0
476         mov     QWORD[((-16))+r15*8+rsp],rdi
477         mov     r13,rdx
478
479         mul     rbx
480         add     r10,rax
481         mov     rax,QWORD[r15*8+rcx]
482         adc     rdx,0
483         add     r10,QWORD[r15*8+rsp]
484         adc     rdx,0
485         mov     r11,rdx
486
487         mul     rbp
488         add     r13,rax
489         mov     rax,QWORD[8+r15*8+rsi]
490         adc     rdx,0
491         add     r13,r10
492         adc     rdx,0
493         mov     QWORD[((-8))+r15*8+rsp],r13
494         mov     rdi,rdx
495
496         mul     rbx
497         add     r11,rax
498         mov     rax,QWORD[8+r15*8+rcx]
499         adc     rdx,0
500         add     r11,QWORD[8+r15*8+rsp]
501         adc     rdx,0
502         lea     r15,[4+r15]
503         mov     r10,rdx
504
505         mul     rbp
506         add     rdi,rax
507         mov     rax,QWORD[((-16))+r15*8+rsi]
508         adc     rdx,0
509         add     rdi,r11
510         adc     rdx,0
511         mov     QWORD[((-32))+r15*8+rsp],rdi
512         mov     r13,rdx
513         cmp     r15,r9
514         jb      NEAR $L$inner4x
515
516         mul     rbx
517         add     r10,rax
518         mov     rax,QWORD[((-16))+r15*8+rcx]
519         adc     rdx,0
520         add     r10,QWORD[((-16))+r15*8+rsp]
521         adc     rdx,0
522         mov     r11,rdx
523
524         mul     rbp
525         add     r13,rax
526         mov     rax,QWORD[((-8))+r15*8+rsi]
527         adc     rdx,0
528         add     r13,r10
529         adc     rdx,0
530         mov     QWORD[((-24))+r15*8+rsp],r13
531         mov     rdi,rdx
532
533         mul     rbx
534         add     r11,rax
535         mov     rax,QWORD[((-8))+r15*8+rcx]
536         adc     rdx,0
537         add     r11,QWORD[((-8))+r15*8+rsp]
538         adc     rdx,0
539         lea     r14,[1+r14]
540         mov     r10,rdx
541
542         mul     rbp
543         add     rdi,rax
544         mov     rax,QWORD[rsi]
545         adc     rdx,0
546         add     rdi,r11
547         adc     rdx,0
548         mov     QWORD[((-16))+r15*8+rsp],rdi
549         mov     r13,rdx
550
551         xor     rdi,rdi
552         add     r13,r10
553         adc     rdi,0
554         add     r13,QWORD[r9*8+rsp]
555         adc     rdi,0
556         mov     QWORD[((-8))+r15*8+rsp],r13
557         mov     QWORD[r15*8+rsp],rdi
558
559         cmp     r14,r9
560         jb      NEAR $L$outer4x
561         mov     rdi,QWORD[16+r9*8+rsp]
562         mov     rax,QWORD[rsp]
563         mov     rdx,QWORD[8+rsp]
564         shr     r9,2
565         lea     rsi,[rsp]
566         xor     r14,r14
567
568         sub     rax,QWORD[rcx]
569         mov     rbx,QWORD[16+rsi]
570         mov     rbp,QWORD[24+rsi]
571         sbb     rdx,QWORD[8+rcx]
572         lea     r15,[((-1))+r9]
573         jmp     NEAR $L$sub4x
574 ALIGN   16
575 $L$sub4x:
576         mov     QWORD[r14*8+rdi],rax
577         mov     QWORD[8+r14*8+rdi],rdx
578         sbb     rbx,QWORD[16+r14*8+rcx]
579         mov     rax,QWORD[32+r14*8+rsi]
580         mov     rdx,QWORD[40+r14*8+rsi]
581         sbb     rbp,QWORD[24+r14*8+rcx]
582         mov     QWORD[16+r14*8+rdi],rbx
583         mov     QWORD[24+r14*8+rdi],rbp
584         sbb     rax,QWORD[32+r14*8+rcx]
585         mov     rbx,QWORD[48+r14*8+rsi]
586         mov     rbp,QWORD[56+r14*8+rsi]
587         sbb     rdx,QWORD[40+r14*8+rcx]
588         lea     r14,[4+r14]
589         dec     r15
590         jnz     NEAR $L$sub4x
591
592         mov     QWORD[r14*8+rdi],rax
593         mov     rax,QWORD[32+r14*8+rsi]
594         sbb     rbx,QWORD[16+r14*8+rcx]
595         mov     QWORD[8+r14*8+rdi],rdx
596         sbb     rbp,QWORD[24+r14*8+rcx]
597         mov     QWORD[16+r14*8+rdi],rbx
598
599         sbb     rax,0
600 DB 66h, 48h, 0fh, 6eh, 0c0h
601         punpcklqdq      xmm0,xmm0
602         mov     QWORD[24+r14*8+rdi],rbp
603         xor     r14,r14
604
605         mov     r15,r9
606         pxor    xmm5,xmm5
607         jmp     NEAR $L$copy4x
608 ALIGN   16
609 $L$copy4x:
610         movdqu  xmm2,XMMWORD[r14*1+rsp]
611         movdqu  xmm4,XMMWORD[16+r14*1+rsp]
612         movdqu  xmm1,XMMWORD[r14*1+rdi]
613         movdqu  xmm3,XMMWORD[16+r14*1+rdi]
614         pxor    xmm2,xmm1
615         pxor    xmm4,xmm3
616         pand    xmm2,xmm0
617         pand    xmm4,xmm0
618         pxor    xmm2,xmm1
619         pxor    xmm4,xmm3
620         movdqu  XMMWORD[r14*1+rdi],xmm2
621         movdqu  XMMWORD[16+r14*1+rdi],xmm4
622         movdqa  XMMWORD[r14*1+rsp],xmm5
623         movdqa  XMMWORD[16+r14*1+rsp],xmm5
624
625         lea     r14,[32+r14]
626         dec     r15
627         jnz     NEAR $L$copy4x
628
629         shl     r9,2
630         mov     rsi,QWORD[8+r9*8+rsp]
631         mov     rax,1
632         mov     r15,QWORD[rsi]
633         mov     r14,QWORD[8+rsi]
634         mov     r13,QWORD[16+rsi]
635         mov     r12,QWORD[24+rsi]
636         mov     rbp,QWORD[32+rsi]
637         mov     rbx,QWORD[40+rsi]
638         lea     rsp,[48+rsi]
639 $L$mul4x_epilogue:
640         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
641         mov     rsi,QWORD[16+rsp]
642         DB      0F3h,0C3h               ;repret
643 $L$SEH_end_bn_mul4x_mont:
644 EXTERN  bn_sqr8x_internal
645
646
647 ALIGN   32
648 bn_sqr8x_mont:
649         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
650         mov     QWORD[16+rsp],rsi
651         mov     rax,rsp
652 $L$SEH_begin_bn_sqr8x_mont:
653         mov     rdi,rcx
654         mov     rsi,rdx
655         mov     rdx,r8
656         mov     rcx,r9
657         mov     r8,QWORD[40+rsp]
658         mov     r9,QWORD[48+rsp]
659
660
661 $L$sqr8x_enter:
662         mov     rax,rsp
663         push    rbx
664         push    rbp
665         push    r12
666         push    r13
667         push    r14
668         push    r15
669
670         mov     r10d,r9d
671         shl     r9d,3
672         shl     r10,3+2
673         neg     r9
674
675
676
677
678
679
680         lea     r11,[((-64))+r9*4+rsp]
681         mov     r8,QWORD[r8]
682         sub     r11,rsi
683         and     r11,4095
684         cmp     r10,r11
685         jb      NEAR $L$sqr8x_sp_alt
686         sub     rsp,r11
687         lea     rsp,[((-64))+r9*4+rsp]
688         jmp     NEAR $L$sqr8x_sp_done
689
690 ALIGN   32
691 $L$sqr8x_sp_alt:
692         lea     r10,[((4096-64))+r9*4]
693         lea     rsp,[((-64))+r9*4+rsp]
694         sub     r11,r10
695         mov     r10,0
696         cmovc   r11,r10
697         sub     rsp,r11
698 $L$sqr8x_sp_done:
699         and     rsp,-64
700         mov     r10,r9
701         neg     r9
702
703         lea     r11,[64+r9*2+rsp]
704         mov     QWORD[32+rsp],r8
705         mov     QWORD[40+rsp],rax
706 $L$sqr8x_body:
707
708         mov     rbp,r9
709 DB      102,73,15,110,211
710         shr     rbp,3+2
711         mov     eax,DWORD[((OPENSSL_ia32cap_P+8))]
712         jmp     NEAR $L$sqr8x_copy_n
713
714 ALIGN   32
715 $L$sqr8x_copy_n:
716         movq    xmm0,QWORD[rcx]
717         movq    xmm1,QWORD[8+rcx]
718         movq    xmm3,QWORD[16+rcx]
719         movq    xmm4,QWORD[24+rcx]
720         lea     rcx,[32+rcx]
721         movdqa  XMMWORD[r11],xmm0
722         movdqa  XMMWORD[16+r11],xmm1
723         movdqa  XMMWORD[32+r11],xmm3
724         movdqa  XMMWORD[48+r11],xmm4
725         lea     r11,[64+r11]
726         dec     rbp
727         jnz     NEAR $L$sqr8x_copy_n
728
729         pxor    xmm0,xmm0
730 DB      102,72,15,110,207
731 DB      102,73,15,110,218
732         call    bn_sqr8x_internal
733
734         pxor    xmm0,xmm0
735         lea     rax,[48+rsp]
736         lea     rdx,[64+r9*2+rsp]
737         shr     r9,3+2
738         mov     rsi,QWORD[40+rsp]
739         jmp     NEAR $L$sqr8x_zero
740
741 ALIGN   32
742 $L$sqr8x_zero:
743         movdqa  XMMWORD[rax],xmm0
744         movdqa  XMMWORD[16+rax],xmm0
745         movdqa  XMMWORD[32+rax],xmm0
746         movdqa  XMMWORD[48+rax],xmm0
747         lea     rax,[64+rax]
748         movdqa  XMMWORD[rdx],xmm0
749         movdqa  XMMWORD[16+rdx],xmm0
750         movdqa  XMMWORD[32+rdx],xmm0
751         movdqa  XMMWORD[48+rdx],xmm0
752         lea     rdx,[64+rdx]
753         dec     r9
754         jnz     NEAR $L$sqr8x_zero
755
756         mov     rax,1
757         mov     r15,QWORD[((-48))+rsi]
758         mov     r14,QWORD[((-40))+rsi]
759         mov     r13,QWORD[((-32))+rsi]
760         mov     r12,QWORD[((-24))+rsi]
761         mov     rbp,QWORD[((-16))+rsi]
762         mov     rbx,QWORD[((-8))+rsi]
763         lea     rsp,[rsi]
764 $L$sqr8x_epilogue:
765         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
766         mov     rsi,QWORD[16+rsp]
767         DB      0F3h,0C3h               ;repret
768 $L$SEH_end_bn_sqr8x_mont:
769 DB      77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
770 DB      112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
771 DB      54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
772 DB      32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
773 DB      115,108,46,111,114,103,62,0
774 ALIGN   16
775 EXTERN  __imp_RtlVirtualUnwind
776
777 ALIGN   16
778 mul_handler:
779         push    rsi
780         push    rdi
781         push    rbx
782         push    rbp
783         push    r12
784         push    r13
785         push    r14
786         push    r15
787         pushfq
788         sub     rsp,64
789
790         mov     rax,QWORD[120+r8]
791         mov     rbx,QWORD[248+r8]
792
793         mov     rsi,QWORD[8+r9]
794         mov     r11,QWORD[56+r9]
795
796         mov     r10d,DWORD[r11]
797         lea     r10,[r10*1+rsi]
798         cmp     rbx,r10
799         jb      NEAR $L$common_seh_tail
800
801         mov     rax,QWORD[152+r8]
802
803         mov     r10d,DWORD[4+r11]
804         lea     r10,[r10*1+rsi]
805         cmp     rbx,r10
806         jae     NEAR $L$common_seh_tail
807
808         mov     r10,QWORD[192+r8]
809         mov     rax,QWORD[8+r10*8+rax]
810         lea     rax,[48+rax]
811
812         mov     rbx,QWORD[((-8))+rax]
813         mov     rbp,QWORD[((-16))+rax]
814         mov     r12,QWORD[((-24))+rax]
815         mov     r13,QWORD[((-32))+rax]
816         mov     r14,QWORD[((-40))+rax]
817         mov     r15,QWORD[((-48))+rax]
818         mov     QWORD[144+r8],rbx
819         mov     QWORD[160+r8],rbp
820         mov     QWORD[216+r8],r12
821         mov     QWORD[224+r8],r13
822         mov     QWORD[232+r8],r14
823         mov     QWORD[240+r8],r15
824
825         jmp     NEAR $L$common_seh_tail
826
827
828
829 ALIGN   16
830 sqr_handler:
831         push    rsi
832         push    rdi
833         push    rbx
834         push    rbp
835         push    r12
836         push    r13
837         push    r14
838         push    r15
839         pushfq
840         sub     rsp,64
841
842         mov     rax,QWORD[120+r8]
843         mov     rbx,QWORD[248+r8]
844
845         mov     rsi,QWORD[8+r9]
846         mov     r11,QWORD[56+r9]
847
848         mov     r10d,DWORD[r11]
849         lea     r10,[r10*1+rsi]
850         cmp     rbx,r10
851         jb      NEAR $L$common_seh_tail
852
853         mov     rax,QWORD[152+r8]
854
855         mov     r10d,DWORD[4+r11]
856         lea     r10,[r10*1+rsi]
857         cmp     rbx,r10
858         jae     NEAR $L$common_seh_tail
859
860         mov     rax,QWORD[40+rax]
861
862         mov     rbx,QWORD[((-8))+rax]
863         mov     rbp,QWORD[((-16))+rax]
864         mov     r12,QWORD[((-24))+rax]
865         mov     r13,QWORD[((-32))+rax]
866         mov     r14,QWORD[((-40))+rax]
867         mov     r15,QWORD[((-48))+rax]
868         mov     QWORD[144+r8],rbx
869         mov     QWORD[160+r8],rbp
870         mov     QWORD[216+r8],r12
871         mov     QWORD[224+r8],r13
872         mov     QWORD[232+r8],r14
873         mov     QWORD[240+r8],r15
874
875 $L$common_seh_tail:
876         mov     rdi,QWORD[8+rax]
877         mov     rsi,QWORD[16+rax]
878         mov     QWORD[152+r8],rax
879         mov     QWORD[168+r8],rsi
880         mov     QWORD[176+r8],rdi
881
882         mov     rdi,QWORD[40+r9]
883         mov     rsi,r8
884         mov     ecx,154
885         DD      0xa548f3fc
886
887         mov     rsi,r9
888         xor     rcx,rcx
889         mov     rdx,QWORD[8+rsi]
890         mov     r8,QWORD[rsi]
891         mov     r9,QWORD[16+rsi]
892         mov     r10,QWORD[40+rsi]
893         lea     r11,[56+rsi]
894         lea     r12,[24+rsi]
895         mov     QWORD[32+rsp],r10
896         mov     QWORD[40+rsp],r11
897         mov     QWORD[48+rsp],r12
898         mov     QWORD[56+rsp],rcx
899         call    QWORD[__imp_RtlVirtualUnwind]
900
901         mov     eax,1
902         add     rsp,64
903         popfq
904         pop     r15
905         pop     r14
906         pop     r13
907         pop     r12
908         pop     rbp
909         pop     rbx
910         pop     rdi
911         pop     rsi
912         DB      0F3h,0C3h               ;repret
913
914
915 section .pdata rdata align=4
916 ALIGN   4
917         DD      $L$SEH_begin_bn_mul_mont wrt ..imagebase
918         DD      $L$SEH_end_bn_mul_mont wrt ..imagebase
919         DD      $L$SEH_info_bn_mul_mont wrt ..imagebase
920
921         DD      $L$SEH_begin_bn_mul4x_mont wrt ..imagebase
922         DD      $L$SEH_end_bn_mul4x_mont wrt ..imagebase
923         DD      $L$SEH_info_bn_mul4x_mont wrt ..imagebase
924
925         DD      $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
926         DD      $L$SEH_end_bn_sqr8x_mont wrt ..imagebase
927         DD      $L$SEH_info_bn_sqr8x_mont wrt ..imagebase
928 section .xdata rdata align=8
929 ALIGN   8
930 $L$SEH_info_bn_mul_mont:
931 DB      9,0,0,0
932         DD      mul_handler wrt ..imagebase
933         DD      $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
934 $L$SEH_info_bn_mul4x_mont:
935 DB      9,0,0,0
936         DD      mul_handler wrt ..imagebase
937         DD      $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
938 $L$SEH_info_bn_sqr8x_mont:
939 DB      9,0,0,0
940         DD      sqr_handler wrt ..imagebase
941         DD      $L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase