Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / third_party / boringssl / win-x86_64 / crypto / modes / ghash-x86_64.asm
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7 EXTERN  OPENSSL_ia32cap_P
8
9 global  gcm_gmult_4bit
10
11 ALIGN   16
12 gcm_gmult_4bit:
13         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
14         mov     QWORD[16+rsp],rsi
15         mov     rax,rsp
16 $L$SEH_begin_gcm_gmult_4bit:
17         mov     rdi,rcx
18         mov     rsi,rdx
19
20
21         push    rbx
22         push    rbp
23         push    r12
24 $L$gmult_prologue:
25
26         movzx   r8,BYTE[15+rdi]
27         lea     r11,[$L$rem_4bit]
28         xor     rax,rax
29         xor     rbx,rbx
30         mov     al,r8b
31         mov     bl,r8b
32         shl     al,4
33         mov     rcx,14
34         mov     r8,QWORD[8+rax*1+rsi]
35         mov     r9,QWORD[rax*1+rsi]
36         and     bl,0xf0
37         mov     rdx,r8
38         jmp     NEAR $L$oop1
39
40 ALIGN   16
41 $L$oop1:
42         shr     r8,4
43         and     rdx,0xf
44         mov     r10,r9
45         mov     al,BYTE[rcx*1+rdi]
46         shr     r9,4
47         xor     r8,QWORD[8+rbx*1+rsi]
48         shl     r10,60
49         xor     r9,QWORD[rbx*1+rsi]
50         mov     bl,al
51         xor     r9,QWORD[rdx*8+r11]
52         mov     rdx,r8
53         shl     al,4
54         xor     r8,r10
55         dec     rcx
56         js      NEAR $L$break1
57
58         shr     r8,4
59         and     rdx,0xf
60         mov     r10,r9
61         shr     r9,4
62         xor     r8,QWORD[8+rax*1+rsi]
63         shl     r10,60
64         xor     r9,QWORD[rax*1+rsi]
65         and     bl,0xf0
66         xor     r9,QWORD[rdx*8+r11]
67         mov     rdx,r8
68         xor     r8,r10
69         jmp     NEAR $L$oop1
70
71 ALIGN   16
72 $L$break1:
73         shr     r8,4
74         and     rdx,0xf
75         mov     r10,r9
76         shr     r9,4
77         xor     r8,QWORD[8+rax*1+rsi]
78         shl     r10,60
79         xor     r9,QWORD[rax*1+rsi]
80         and     bl,0xf0
81         xor     r9,QWORD[rdx*8+r11]
82         mov     rdx,r8
83         xor     r8,r10
84
85         shr     r8,4
86         and     rdx,0xf
87         mov     r10,r9
88         shr     r9,4
89         xor     r8,QWORD[8+rbx*1+rsi]
90         shl     r10,60
91         xor     r9,QWORD[rbx*1+rsi]
92         xor     r8,r10
93         xor     r9,QWORD[rdx*8+r11]
94
95         bswap   r8
96         bswap   r9
97         mov     QWORD[8+rdi],r8
98         mov     QWORD[rdi],r9
99
100         mov     rbx,QWORD[16+rsp]
101         lea     rsp,[24+rsp]
102 $L$gmult_epilogue:
103         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
104         mov     rsi,QWORD[16+rsp]
105         DB      0F3h,0C3h               ;repret
106 $L$SEH_end_gcm_gmult_4bit:
107 global  gcm_ghash_4bit
108
109 ALIGN   16
110 gcm_ghash_4bit:
111         mov     QWORD[8+rsp],rdi        ;WIN64 prologue
112         mov     QWORD[16+rsp],rsi
113         mov     rax,rsp
114 $L$SEH_begin_gcm_ghash_4bit:
115         mov     rdi,rcx
116         mov     rsi,rdx
117         mov     rdx,r8
118         mov     rcx,r9
119
120
121         push    rbx
122         push    rbp
123         push    r12
124         push    r13
125         push    r14
126         push    r15
127         sub     rsp,280
128 $L$ghash_prologue:
129         mov     r14,rdx
130         mov     r15,rcx
131         sub     rsi,-128
132         lea     rbp,[((16+128))+rsp]
133         xor     edx,edx
134         mov     r8,QWORD[((0+0-128))+rsi]
135         mov     rax,QWORD[((0+8-128))+rsi]
136         mov     dl,al
137         shr     rax,4
138         mov     r10,r8
139         shr     r8,4
140         mov     r9,QWORD[((16+0-128))+rsi]
141         shl     dl,4
142         mov     rbx,QWORD[((16+8-128))+rsi]
143         shl     r10,60
144         mov     BYTE[rsp],dl
145         or      rax,r10
146         mov     dl,bl
147         shr     rbx,4
148         mov     r10,r9
149         shr     r9,4
150         mov     QWORD[rbp],r8
151         mov     r8,QWORD[((32+0-128))+rsi]
152         shl     dl,4
153         mov     QWORD[((0-128))+rbp],rax
154         mov     rax,QWORD[((32+8-128))+rsi]
155         shl     r10,60
156         mov     BYTE[1+rsp],dl
157         or      rbx,r10
158         mov     dl,al
159         shr     rax,4
160         mov     r10,r8
161         shr     r8,4
162         mov     QWORD[8+rbp],r9
163         mov     r9,QWORD[((48+0-128))+rsi]
164         shl     dl,4
165         mov     QWORD[((8-128))+rbp],rbx
166         mov     rbx,QWORD[((48+8-128))+rsi]
167         shl     r10,60
168         mov     BYTE[2+rsp],dl
169         or      rax,r10
170         mov     dl,bl
171         shr     rbx,4
172         mov     r10,r9
173         shr     r9,4
174         mov     QWORD[16+rbp],r8
175         mov     r8,QWORD[((64+0-128))+rsi]
176         shl     dl,4
177         mov     QWORD[((16-128))+rbp],rax
178         mov     rax,QWORD[((64+8-128))+rsi]
179         shl     r10,60
180         mov     BYTE[3+rsp],dl
181         or      rbx,r10
182         mov     dl,al
183         shr     rax,4
184         mov     r10,r8
185         shr     r8,4
186         mov     QWORD[24+rbp],r9
187         mov     r9,QWORD[((80+0-128))+rsi]
188         shl     dl,4
189         mov     QWORD[((24-128))+rbp],rbx
190         mov     rbx,QWORD[((80+8-128))+rsi]
191         shl     r10,60
192         mov     BYTE[4+rsp],dl
193         or      rax,r10
194         mov     dl,bl
195         shr     rbx,4
196         mov     r10,r9
197         shr     r9,4
198         mov     QWORD[32+rbp],r8
199         mov     r8,QWORD[((96+0-128))+rsi]
200         shl     dl,4
201         mov     QWORD[((32-128))+rbp],rax
202         mov     rax,QWORD[((96+8-128))+rsi]
203         shl     r10,60
204         mov     BYTE[5+rsp],dl
205         or      rbx,r10
206         mov     dl,al
207         shr     rax,4
208         mov     r10,r8
209         shr     r8,4
210         mov     QWORD[40+rbp],r9
211         mov     r9,QWORD[((112+0-128))+rsi]
212         shl     dl,4
213         mov     QWORD[((40-128))+rbp],rbx
214         mov     rbx,QWORD[((112+8-128))+rsi]
215         shl     r10,60
216         mov     BYTE[6+rsp],dl
217         or      rax,r10
218         mov     dl,bl
219         shr     rbx,4
220         mov     r10,r9
221         shr     r9,4
222         mov     QWORD[48+rbp],r8
223         mov     r8,QWORD[((128+0-128))+rsi]
224         shl     dl,4
225         mov     QWORD[((48-128))+rbp],rax
226         mov     rax,QWORD[((128+8-128))+rsi]
227         shl     r10,60
228         mov     BYTE[7+rsp],dl
229         or      rbx,r10
230         mov     dl,al
231         shr     rax,4
232         mov     r10,r8
233         shr     r8,4
234         mov     QWORD[56+rbp],r9
235         mov     r9,QWORD[((144+0-128))+rsi]
236         shl     dl,4
237         mov     QWORD[((56-128))+rbp],rbx
238         mov     rbx,QWORD[((144+8-128))+rsi]
239         shl     r10,60
240         mov     BYTE[8+rsp],dl
241         or      rax,r10
242         mov     dl,bl
243         shr     rbx,4
244         mov     r10,r9
245         shr     r9,4
246         mov     QWORD[64+rbp],r8
247         mov     r8,QWORD[((160+0-128))+rsi]
248         shl     dl,4
249         mov     QWORD[((64-128))+rbp],rax
250         mov     rax,QWORD[((160+8-128))+rsi]
251         shl     r10,60
252         mov     BYTE[9+rsp],dl
253         or      rbx,r10
254         mov     dl,al
255         shr     rax,4
256         mov     r10,r8
257         shr     r8,4
258         mov     QWORD[72+rbp],r9
259         mov     r9,QWORD[((176+0-128))+rsi]
260         shl     dl,4
261         mov     QWORD[((72-128))+rbp],rbx
262         mov     rbx,QWORD[((176+8-128))+rsi]
263         shl     r10,60
264         mov     BYTE[10+rsp],dl
265         or      rax,r10
266         mov     dl,bl
267         shr     rbx,4
268         mov     r10,r9
269         shr     r9,4
270         mov     QWORD[80+rbp],r8
271         mov     r8,QWORD[((192+0-128))+rsi]
272         shl     dl,4
273         mov     QWORD[((80-128))+rbp],rax
274         mov     rax,QWORD[((192+8-128))+rsi]
275         shl     r10,60
276         mov     BYTE[11+rsp],dl
277         or      rbx,r10
278         mov     dl,al
279         shr     rax,4
280         mov     r10,r8
281         shr     r8,4
282         mov     QWORD[88+rbp],r9
283         mov     r9,QWORD[((208+0-128))+rsi]
284         shl     dl,4
285         mov     QWORD[((88-128))+rbp],rbx
286         mov     rbx,QWORD[((208+8-128))+rsi]
287         shl     r10,60
288         mov     BYTE[12+rsp],dl
289         or      rax,r10
290         mov     dl,bl
291         shr     rbx,4
292         mov     r10,r9
293         shr     r9,4
294         mov     QWORD[96+rbp],r8
295         mov     r8,QWORD[((224+0-128))+rsi]
296         shl     dl,4
297         mov     QWORD[((96-128))+rbp],rax
298         mov     rax,QWORD[((224+8-128))+rsi]
299         shl     r10,60
300         mov     BYTE[13+rsp],dl
301         or      rbx,r10
302         mov     dl,al
303         shr     rax,4
304         mov     r10,r8
305         shr     r8,4
306         mov     QWORD[104+rbp],r9
307         mov     r9,QWORD[((240+0-128))+rsi]
308         shl     dl,4
309         mov     QWORD[((104-128))+rbp],rbx
310         mov     rbx,QWORD[((240+8-128))+rsi]
311         shl     r10,60
312         mov     BYTE[14+rsp],dl
313         or      rax,r10
314         mov     dl,bl
315         shr     rbx,4
316         mov     r10,r9
317         shr     r9,4
318         mov     QWORD[112+rbp],r8
319         shl     dl,4
320         mov     QWORD[((112-128))+rbp],rax
321         shl     r10,60
322         mov     BYTE[15+rsp],dl
323         or      rbx,r10
324         mov     QWORD[120+rbp],r9
325         mov     QWORD[((120-128))+rbp],rbx
326         add     rsi,-128
327         mov     r8,QWORD[8+rdi]
328         mov     r9,QWORD[rdi]
329         add     r15,r14
330         lea     r11,[$L$rem_8bit]
331         jmp     NEAR $L$outer_loop
332 ALIGN   16
333 $L$outer_loop:
334         xor     r9,QWORD[r14]
335         mov     rdx,QWORD[8+r14]
336         lea     r14,[16+r14]
337         xor     rdx,r8
338         mov     QWORD[rdi],r9
339         mov     QWORD[8+rdi],rdx
340         shr     rdx,32
341         xor     rax,rax
342         rol     edx,8
343         mov     al,dl
344         movzx   ebx,dl
345         shl     al,4
346         shr     ebx,4
347         rol     edx,8
348         mov     r8,QWORD[8+rax*1+rsi]
349         mov     r9,QWORD[rax*1+rsi]
350         mov     al,dl
351         movzx   ecx,dl
352         shl     al,4
353         movzx   r12,BYTE[rbx*1+rsp]
354         shr     ecx,4
355         xor     r12,r8
356         mov     r10,r9
357         shr     r8,8
358         movzx   r12,r12b
359         shr     r9,8
360         xor     r8,QWORD[((-128))+rbx*8+rbp]
361         shl     r10,56
362         xor     r9,QWORD[rbx*8+rbp]
363         rol     edx,8
364         xor     r8,QWORD[8+rax*1+rsi]
365         xor     r9,QWORD[rax*1+rsi]
366         mov     al,dl
367         xor     r8,r10
368         movzx   r12,WORD[r12*2+r11]
369         movzx   ebx,dl
370         shl     al,4
371         movzx   r13,BYTE[rcx*1+rsp]
372         shr     ebx,4
373         shl     r12,48
374         xor     r13,r8
375         mov     r10,r9
376         xor     r9,r12
377         shr     r8,8
378         movzx   r13,r13b
379         shr     r9,8
380         xor     r8,QWORD[((-128))+rcx*8+rbp]
381         shl     r10,56
382         xor     r9,QWORD[rcx*8+rbp]
383         rol     edx,8
384         xor     r8,QWORD[8+rax*1+rsi]
385         xor     r9,QWORD[rax*1+rsi]
386         mov     al,dl
387         xor     r8,r10
388         movzx   r13,WORD[r13*2+r11]
389         movzx   ecx,dl
390         shl     al,4
391         movzx   r12,BYTE[rbx*1+rsp]
392         shr     ecx,4
393         shl     r13,48
394         xor     r12,r8
395         mov     r10,r9
396         xor     r9,r13
397         shr     r8,8
398         movzx   r12,r12b
399         mov     edx,DWORD[8+rdi]
400         shr     r9,8
401         xor     r8,QWORD[((-128))+rbx*8+rbp]
402         shl     r10,56
403         xor     r9,QWORD[rbx*8+rbp]
404         rol     edx,8
405         xor     r8,QWORD[8+rax*1+rsi]
406         xor     r9,QWORD[rax*1+rsi]
407         mov     al,dl
408         xor     r8,r10
409         movzx   r12,WORD[r12*2+r11]
410         movzx   ebx,dl
411         shl     al,4
412         movzx   r13,BYTE[rcx*1+rsp]
413         shr     ebx,4
414         shl     r12,48
415         xor     r13,r8
416         mov     r10,r9
417         xor     r9,r12
418         shr     r8,8
419         movzx   r13,r13b
420         shr     r9,8
421         xor     r8,QWORD[((-128))+rcx*8+rbp]
422         shl     r10,56
423         xor     r9,QWORD[rcx*8+rbp]
424         rol     edx,8
425         xor     r8,QWORD[8+rax*1+rsi]
426         xor     r9,QWORD[rax*1+rsi]
427         mov     al,dl
428         xor     r8,r10
429         movzx   r13,WORD[r13*2+r11]
430         movzx   ecx,dl
431         shl     al,4
432         movzx   r12,BYTE[rbx*1+rsp]
433         shr     ecx,4
434         shl     r13,48
435         xor     r12,r8
436         mov     r10,r9
437         xor     r9,r13
438         shr     r8,8
439         movzx   r12,r12b
440         shr     r9,8
441         xor     r8,QWORD[((-128))+rbx*8+rbp]
442         shl     r10,56
443         xor     r9,QWORD[rbx*8+rbp]
444         rol     edx,8
445         xor     r8,QWORD[8+rax*1+rsi]
446         xor     r9,QWORD[rax*1+rsi]
447         mov     al,dl
448         xor     r8,r10
449         movzx   r12,WORD[r12*2+r11]
450         movzx   ebx,dl
451         shl     al,4
452         movzx   r13,BYTE[rcx*1+rsp]
453         shr     ebx,4
454         shl     r12,48
455         xor     r13,r8
456         mov     r10,r9
457         xor     r9,r12
458         shr     r8,8
459         movzx   r13,r13b
460         shr     r9,8
461         xor     r8,QWORD[((-128))+rcx*8+rbp]
462         shl     r10,56
463         xor     r9,QWORD[rcx*8+rbp]
464         rol     edx,8
465         xor     r8,QWORD[8+rax*1+rsi]
466         xor     r9,QWORD[rax*1+rsi]
467         mov     al,dl
468         xor     r8,r10
469         movzx   r13,WORD[r13*2+r11]
470         movzx   ecx,dl
471         shl     al,4
472         movzx   r12,BYTE[rbx*1+rsp]
473         shr     ecx,4
474         shl     r13,48
475         xor     r12,r8
476         mov     r10,r9
477         xor     r9,r13
478         shr     r8,8
479         movzx   r12,r12b
480         mov     edx,DWORD[4+rdi]
481         shr     r9,8
482         xor     r8,QWORD[((-128))+rbx*8+rbp]
483         shl     r10,56
484         xor     r9,QWORD[rbx*8+rbp]
485         rol     edx,8
486         xor     r8,QWORD[8+rax*1+rsi]
487         xor     r9,QWORD[rax*1+rsi]
488         mov     al,dl
489         xor     r8,r10
490         movzx   r12,WORD[r12*2+r11]
491         movzx   ebx,dl
492         shl     al,4
493         movzx   r13,BYTE[rcx*1+rsp]
494         shr     ebx,4
495         shl     r12,48
496         xor     r13,r8
497         mov     r10,r9
498         xor     r9,r12
499         shr     r8,8
500         movzx   r13,r13b
501         shr     r9,8
502         xor     r8,QWORD[((-128))+rcx*8+rbp]
503         shl     r10,56
504         xor     r9,QWORD[rcx*8+rbp]
505         rol     edx,8
506         xor     r8,QWORD[8+rax*1+rsi]
507         xor     r9,QWORD[rax*1+rsi]
508         mov     al,dl
509         xor     r8,r10
510         movzx   r13,WORD[r13*2+r11]
511         movzx   ecx,dl
512         shl     al,4
513         movzx   r12,BYTE[rbx*1+rsp]
514         shr     ecx,4
515         shl     r13,48
516         xor     r12,r8
517         mov     r10,r9
518         xor     r9,r13
519         shr     r8,8
520         movzx   r12,r12b
521         shr     r9,8
522         xor     r8,QWORD[((-128))+rbx*8+rbp]
523         shl     r10,56
524         xor     r9,QWORD[rbx*8+rbp]
525         rol     edx,8
526         xor     r8,QWORD[8+rax*1+rsi]
527         xor     r9,QWORD[rax*1+rsi]
528         mov     al,dl
529         xor     r8,r10
530         movzx   r12,WORD[r12*2+r11]
531         movzx   ebx,dl
532         shl     al,4
533         movzx   r13,BYTE[rcx*1+rsp]
534         shr     ebx,4
535         shl     r12,48
536         xor     r13,r8
537         mov     r10,r9
538         xor     r9,r12
539         shr     r8,8
540         movzx   r13,r13b
541         shr     r9,8
542         xor     r8,QWORD[((-128))+rcx*8+rbp]
543         shl     r10,56
544         xor     r9,QWORD[rcx*8+rbp]
545         rol     edx,8
546         xor     r8,QWORD[8+rax*1+rsi]
547         xor     r9,QWORD[rax*1+rsi]
548         mov     al,dl
549         xor     r8,r10
550         movzx   r13,WORD[r13*2+r11]
551         movzx   ecx,dl
552         shl     al,4
553         movzx   r12,BYTE[rbx*1+rsp]
554         shr     ecx,4
555         shl     r13,48
556         xor     r12,r8
557         mov     r10,r9
558         xor     r9,r13
559         shr     r8,8
560         movzx   r12,r12b
561         mov     edx,DWORD[rdi]
562         shr     r9,8
563         xor     r8,QWORD[((-128))+rbx*8+rbp]
564         shl     r10,56
565         xor     r9,QWORD[rbx*8+rbp]
566         rol     edx,8
567         xor     r8,QWORD[8+rax*1+rsi]
568         xor     r9,QWORD[rax*1+rsi]
569         mov     al,dl
570         xor     r8,r10
571         movzx   r12,WORD[r12*2+r11]
572         movzx   ebx,dl
573         shl     al,4
574         movzx   r13,BYTE[rcx*1+rsp]
575         shr     ebx,4
576         shl     r12,48
577         xor     r13,r8
578         mov     r10,r9
579         xor     r9,r12
580         shr     r8,8
581         movzx   r13,r13b
582         shr     r9,8
583         xor     r8,QWORD[((-128))+rcx*8+rbp]
584         shl     r10,56
585         xor     r9,QWORD[rcx*8+rbp]
586         rol     edx,8
587         xor     r8,QWORD[8+rax*1+rsi]
588         xor     r9,QWORD[rax*1+rsi]
589         mov     al,dl
590         xor     r8,r10
591         movzx   r13,WORD[r13*2+r11]
592         movzx   ecx,dl
593         shl     al,4
594         movzx   r12,BYTE[rbx*1+rsp]
595         shr     ecx,4
596         shl     r13,48
597         xor     r12,r8
598         mov     r10,r9
599         xor     r9,r13
600         shr     r8,8
601         movzx   r12,r12b
602         shr     r9,8
603         xor     r8,QWORD[((-128))+rbx*8+rbp]
604         shl     r10,56
605         xor     r9,QWORD[rbx*8+rbp]
606         rol     edx,8
607         xor     r8,QWORD[8+rax*1+rsi]
608         xor     r9,QWORD[rax*1+rsi]
609         mov     al,dl
610         xor     r8,r10
611         movzx   r12,WORD[r12*2+r11]
612         movzx   ebx,dl
613         shl     al,4
614         movzx   r13,BYTE[rcx*1+rsp]
615         shr     ebx,4
616         shl     r12,48
617         xor     r13,r8
618         mov     r10,r9
619         xor     r9,r12
620         shr     r8,8
621         movzx   r13,r13b
622         shr     r9,8
623         xor     r8,QWORD[((-128))+rcx*8+rbp]
624         shl     r10,56
625         xor     r9,QWORD[rcx*8+rbp]
626         rol     edx,8
627         xor     r8,QWORD[8+rax*1+rsi]
628         xor     r9,QWORD[rax*1+rsi]
629         mov     al,dl
630         xor     r8,r10
631         movzx   r13,WORD[r13*2+r11]
632         movzx   ecx,dl
633         shl     al,4
634         movzx   r12,BYTE[rbx*1+rsp]
635         and     ecx,240
636         shl     r13,48
637         xor     r12,r8
638         mov     r10,r9
639         xor     r9,r13
640         shr     r8,8
641         movzx   r12,r12b
642         mov     edx,DWORD[((-4))+rdi]
643         shr     r9,8
644         xor     r8,QWORD[((-128))+rbx*8+rbp]
645         shl     r10,56
646         xor     r9,QWORD[rbx*8+rbp]
647         movzx   r12,WORD[r12*2+r11]
648         xor     r8,QWORD[8+rax*1+rsi]
649         xor     r9,QWORD[rax*1+rsi]
650         shl     r12,48
651         xor     r8,r10
652         xor     r9,r12
653         movzx   r13,r8b
654         shr     r8,4
655         mov     r10,r9
656         shl     r13b,4
657         shr     r9,4
658         xor     r8,QWORD[8+rcx*1+rsi]
659         movzx   r13,WORD[r13*2+r11]
660         shl     r10,60
661         xor     r9,QWORD[rcx*1+rsi]
662         xor     r8,r10
663         shl     r13,48
664         bswap   r8
665         xor     r9,r13
666         bswap   r9
667         cmp     r14,r15
668         jb      NEAR $L$outer_loop
669         mov     QWORD[8+rdi],r8
670         mov     QWORD[rdi],r9
671
672         lea     rsi,[280+rsp]
673         mov     r15,QWORD[rsi]
674         mov     r14,QWORD[8+rsi]
675         mov     r13,QWORD[16+rsi]
676         mov     r12,QWORD[24+rsi]
677         mov     rbp,QWORD[32+rsi]
678         mov     rbx,QWORD[40+rsi]
679         lea     rsp,[48+rsi]
680 $L$ghash_epilogue:
681         mov     rdi,QWORD[8+rsp]        ;WIN64 epilogue
682         mov     rsi,QWORD[16+rsp]
683         DB      0F3h,0C3h               ;repret
684 $L$SEH_end_gcm_ghash_4bit:
685 global  gcm_init_clmul
686
687 ALIGN   16
688 gcm_init_clmul:
689 $L$_init_clmul:
690 $L$SEH_begin_gcm_init_clmul:
691
692 DB      0x48,0x83,0xec,0x18
693 DB      0x0f,0x29,0x34,0x24
694         movdqu  xmm2,XMMWORD[rdx]
695         pshufd  xmm2,xmm2,78
696
697
698         pshufd  xmm4,xmm2,255
699         movdqa  xmm3,xmm2
700         psllq   xmm2,1
701         pxor    xmm5,xmm5
702         psrlq   xmm3,63
703         pcmpgtd xmm5,xmm4
704         pslldq  xmm3,8
705         por     xmm2,xmm3
706
707
708         pand    xmm5,XMMWORD[$L$0x1c2_polynomial]
709         pxor    xmm2,xmm5
710
711
712         pshufd  xmm6,xmm2,78
713         movdqa  xmm0,xmm2
714         pxor    xmm6,xmm2
715         movdqa  xmm1,xmm0
716         pshufd  xmm3,xmm0,78
717         pxor    xmm3,xmm0
718 DB      102,15,58,68,194,0
719 DB      102,15,58,68,202,17
720 DB      102,15,58,68,222,0
721         pxor    xmm3,xmm0
722         pxor    xmm3,xmm1
723
724         movdqa  xmm4,xmm3
725         psrldq  xmm3,8
726         pslldq  xmm4,8
727         pxor    xmm1,xmm3
728         pxor    xmm0,xmm4
729
730         movdqa  xmm4,xmm0
731         movdqa  xmm3,xmm0
732         psllq   xmm0,5
733         pxor    xmm3,xmm0
734         psllq   xmm0,1
735         pxor    xmm0,xmm3
736         psllq   xmm0,57
737         movdqa  xmm3,xmm0
738         pslldq  xmm0,8
739         psrldq  xmm3,8
740         pxor    xmm0,xmm4
741         pxor    xmm1,xmm3
742
743
744         movdqa  xmm4,xmm0
745         psrlq   xmm0,1
746         pxor    xmm1,xmm4
747         pxor    xmm4,xmm0
748         psrlq   xmm0,5
749         pxor    xmm0,xmm4
750         psrlq   xmm0,1
751         pxor    xmm0,xmm1
752         pshufd  xmm3,xmm2,78
753         pshufd  xmm4,xmm0,78
754         pxor    xmm3,xmm2
755         movdqu  XMMWORD[rcx],xmm2
756         pxor    xmm4,xmm0
757         movdqu  XMMWORD[16+rcx],xmm0
758 DB      102,15,58,15,227,8
759         movdqu  XMMWORD[32+rcx],xmm4
760         movdqa  xmm1,xmm0
761         pshufd  xmm3,xmm0,78
762         pxor    xmm3,xmm0
763 DB      102,15,58,68,194,0
764 DB      102,15,58,68,202,17
765 DB      102,15,58,68,222,0
766         pxor    xmm3,xmm0
767         pxor    xmm3,xmm1
768
769         movdqa  xmm4,xmm3
770         psrldq  xmm3,8
771         pslldq  xmm4,8
772         pxor    xmm1,xmm3
773         pxor    xmm0,xmm4
774
775         movdqa  xmm4,xmm0
776         movdqa  xmm3,xmm0
777         psllq   xmm0,5
778         pxor    xmm3,xmm0
779         psllq   xmm0,1
780         pxor    xmm0,xmm3
781         psllq   xmm0,57
782         movdqa  xmm3,xmm0
783         pslldq  xmm0,8
784         psrldq  xmm3,8
785         pxor    xmm0,xmm4
786         pxor    xmm1,xmm3
787
788
789         movdqa  xmm4,xmm0
790         psrlq   xmm0,1
791         pxor    xmm1,xmm4
792         pxor    xmm4,xmm0
793         psrlq   xmm0,5
794         pxor    xmm0,xmm4
795         psrlq   xmm0,1
796         pxor    xmm0,xmm1
797         movdqa  xmm5,xmm0
798         movdqa  xmm1,xmm0
799         pshufd  xmm3,xmm0,78
800         pxor    xmm3,xmm0
801 DB      102,15,58,68,194,0
802 DB      102,15,58,68,202,17
803 DB      102,15,58,68,222,0
804         pxor    xmm3,xmm0
805         pxor    xmm3,xmm1
806
807         movdqa  xmm4,xmm3
808         psrldq  xmm3,8
809         pslldq  xmm4,8
810         pxor    xmm1,xmm3
811         pxor    xmm0,xmm4
812
813         movdqa  xmm4,xmm0
814         movdqa  xmm3,xmm0
815         psllq   xmm0,5
816         pxor    xmm3,xmm0
817         psllq   xmm0,1
818         pxor    xmm0,xmm3
819         psllq   xmm0,57
820         movdqa  xmm3,xmm0
821         pslldq  xmm0,8
822         psrldq  xmm3,8
823         pxor    xmm0,xmm4
824         pxor    xmm1,xmm3
825
826
827         movdqa  xmm4,xmm0
828         psrlq   xmm0,1
829         pxor    xmm1,xmm4
830         pxor    xmm4,xmm0
831         psrlq   xmm0,5
832         pxor    xmm0,xmm4
833         psrlq   xmm0,1
834         pxor    xmm0,xmm1
835         pshufd  xmm3,xmm5,78
836         pshufd  xmm4,xmm0,78
837         pxor    xmm3,xmm5
838         movdqu  XMMWORD[48+rcx],xmm5
839         pxor    xmm4,xmm0
840         movdqu  XMMWORD[64+rcx],xmm0
841 DB      102,15,58,15,227,8
842         movdqu  XMMWORD[80+rcx],xmm4
843         movaps  xmm6,XMMWORD[rsp]
844         lea     rsp,[24+rsp]
845 $L$SEH_end_gcm_init_clmul:
846         DB      0F3h,0C3h               ;repret
847
848 global  gcm_gmult_clmul
849
850 ALIGN   16
851 gcm_gmult_clmul:
852 $L$_gmult_clmul:
853         movdqu  xmm0,XMMWORD[rcx]
854         movdqa  xmm5,XMMWORD[$L$bswap_mask]
855         movdqu  xmm2,XMMWORD[rdx]
856         movdqu  xmm4,XMMWORD[32+rdx]
857 DB      102,15,56,0,197
858         movdqa  xmm1,xmm0
859         pshufd  xmm3,xmm0,78
860         pxor    xmm3,xmm0
861 DB      102,15,58,68,194,0
862 DB      102,15,58,68,202,17
863 DB      102,15,58,68,220,0
864         pxor    xmm3,xmm0
865         pxor    xmm3,xmm1
866
867         movdqa  xmm4,xmm3
868         psrldq  xmm3,8
869         pslldq  xmm4,8
870         pxor    xmm1,xmm3
871         pxor    xmm0,xmm4
872
873         movdqa  xmm4,xmm0
874         movdqa  xmm3,xmm0
875         psllq   xmm0,5
876         pxor    xmm3,xmm0
877         psllq   xmm0,1
878         pxor    xmm0,xmm3
879         psllq   xmm0,57
880         movdqa  xmm3,xmm0
881         pslldq  xmm0,8
882         psrldq  xmm3,8
883         pxor    xmm0,xmm4
884         pxor    xmm1,xmm3
885
886
887         movdqa  xmm4,xmm0
888         psrlq   xmm0,1
889         pxor    xmm1,xmm4
890         pxor    xmm4,xmm0
891         psrlq   xmm0,5
892         pxor    xmm0,xmm4
893         psrlq   xmm0,1
894         pxor    xmm0,xmm1
895 DB      102,15,56,0,197
896         movdqu  XMMWORD[rcx],xmm0
897         DB      0F3h,0C3h               ;repret
898
899 global  gcm_ghash_clmul
900
901 ALIGN   32
902 gcm_ghash_clmul:
903 $L$_ghash_clmul:
904         lea     rax,[((-136))+rsp]
905 $L$SEH_begin_gcm_ghash_clmul:
906
907 DB      0x48,0x8d,0x60,0xe0
908 DB      0x0f,0x29,0x70,0xe0
909 DB      0x0f,0x29,0x78,0xf0
910 DB      0x44,0x0f,0x29,0x00
911 DB      0x44,0x0f,0x29,0x48,0x10
912 DB      0x44,0x0f,0x29,0x50,0x20
913 DB      0x44,0x0f,0x29,0x58,0x30
914 DB      0x44,0x0f,0x29,0x60,0x40
915 DB      0x44,0x0f,0x29,0x68,0x50
916 DB      0x44,0x0f,0x29,0x70,0x60
917 DB      0x44,0x0f,0x29,0x78,0x70
918         movdqa  xmm10,XMMWORD[$L$bswap_mask]
919
920         movdqu  xmm0,XMMWORD[rcx]
921         movdqu  xmm2,XMMWORD[rdx]
922         movdqu  xmm7,XMMWORD[32+rdx]
923 DB      102,65,15,56,0,194
924
925         sub     r9,0x10
926         jz      NEAR $L$odd_tail
927
928         movdqu  xmm6,XMMWORD[16+rdx]
929         mov     eax,DWORD[((OPENSSL_ia32cap_P+4))]
930         cmp     r9,0x30
931         jb      NEAR $L$skip4x
932
933         and     eax,71303168
934         cmp     eax,4194304
935         je      NEAR $L$skip4x
936
937         sub     r9,0x30
938         mov     rax,0xA040608020C0E000
939         movdqu  xmm14,XMMWORD[48+rdx]
940         movdqu  xmm15,XMMWORD[64+rdx]
941
942
943
944
945         movdqu  xmm3,XMMWORD[48+r8]
946         movdqu  xmm11,XMMWORD[32+r8]
947 DB      102,65,15,56,0,218
948 DB      102,69,15,56,0,218
949         movdqa  xmm5,xmm3
950         pshufd  xmm4,xmm3,78
951         pxor    xmm4,xmm3
952 DB      102,15,58,68,218,0
953 DB      102,15,58,68,234,17
954 DB      102,15,58,68,231,0
955
956         movdqa  xmm13,xmm11
957         pshufd  xmm12,xmm11,78
958         pxor    xmm12,xmm11
959 DB      102,68,15,58,68,222,0
960 DB      102,68,15,58,68,238,17
961 DB      102,68,15,58,68,231,16
962         xorps   xmm3,xmm11
963         xorps   xmm5,xmm13
964         movups  xmm7,XMMWORD[80+rdx]
965         xorps   xmm4,xmm12
966
967         movdqu  xmm11,XMMWORD[16+r8]
968         movdqu  xmm8,XMMWORD[r8]
969 DB      102,69,15,56,0,218
970 DB      102,69,15,56,0,194
971         movdqa  xmm13,xmm11
972         pshufd  xmm12,xmm11,78
973         pxor    xmm0,xmm8
974         pxor    xmm12,xmm11
975 DB      102,69,15,58,68,222,0
976         movdqa  xmm1,xmm0
977         pshufd  xmm8,xmm0,78
978         pxor    xmm8,xmm0
979 DB      102,69,15,58,68,238,17
980 DB      102,68,15,58,68,231,0
981         xorps   xmm3,xmm11
982         xorps   xmm5,xmm13
983
984         lea     r8,[64+r8]
985         sub     r9,0x40
986         jc      NEAR $L$tail4x
987
988         jmp     NEAR $L$mod4_loop
989 ALIGN   32
990 $L$mod4_loop:
991 DB      102,65,15,58,68,199,0
992         xorps   xmm4,xmm12
993         movdqu  xmm11,XMMWORD[48+r8]
994 DB      102,69,15,56,0,218
995 DB      102,65,15,58,68,207,17
996         xorps   xmm0,xmm3
997         movdqu  xmm3,XMMWORD[32+r8]
998         movdqa  xmm13,xmm11
999 DB      102,68,15,58,68,199,16
1000         pshufd  xmm12,xmm11,78
1001         xorps   xmm1,xmm5
1002         pxor    xmm12,xmm11
1003 DB      102,65,15,56,0,218
1004         movups  xmm7,XMMWORD[32+rdx]
1005         xorps   xmm8,xmm4
1006 DB      102,68,15,58,68,218,0
1007         pshufd  xmm4,xmm3,78
1008
1009         pxor    xmm8,xmm0
1010         movdqa  xmm5,xmm3
1011         pxor    xmm8,xmm1
1012         pxor    xmm4,xmm3
1013         movdqa  xmm9,xmm8
1014 DB      102,68,15,58,68,234,17
1015         pslldq  xmm8,8
1016         psrldq  xmm9,8
1017         pxor    xmm0,xmm8
1018         movdqa  xmm8,XMMWORD[$L$7_mask]
1019         pxor    xmm1,xmm9
1020 DB      102,76,15,110,200
1021
1022         pand    xmm8,xmm0
1023 DB      102,69,15,56,0,200
1024         pxor    xmm9,xmm0
1025 DB      102,68,15,58,68,231,0
1026         psllq   xmm9,57
1027         movdqa  xmm8,xmm9
1028         pslldq  xmm9,8
1029 DB      102,15,58,68,222,0
1030         psrldq  xmm8,8
1031         pxor    xmm0,xmm9
1032         pxor    xmm1,xmm8
1033         movdqu  xmm8,XMMWORD[r8]
1034
1035         movdqa  xmm9,xmm0
1036         psrlq   xmm0,1
1037 DB      102,15,58,68,238,17
1038         xorps   xmm3,xmm11
1039         movdqu  xmm11,XMMWORD[16+r8]
1040 DB      102,69,15,56,0,218
1041 DB      102,15,58,68,231,16
1042         xorps   xmm5,xmm13
1043         movups  xmm7,XMMWORD[80+rdx]
1044 DB      102,69,15,56,0,194
1045         pxor    xmm1,xmm9
1046         pxor    xmm9,xmm0
1047         psrlq   xmm0,5
1048
1049         movdqa  xmm13,xmm11
1050         pxor    xmm4,xmm12
1051         pshufd  xmm12,xmm11,78
1052         pxor    xmm0,xmm9
1053         pxor    xmm1,xmm8
1054         pxor    xmm12,xmm11
1055 DB      102,69,15,58,68,222,0
1056         psrlq   xmm0,1
1057         pxor    xmm0,xmm1
1058         movdqa  xmm1,xmm0
1059 DB      102,69,15,58,68,238,17
1060         xorps   xmm3,xmm11
1061         pshufd  xmm8,xmm0,78
1062         pxor    xmm8,xmm0
1063
1064 DB      102,68,15,58,68,231,0
1065         xorps   xmm5,xmm13
1066
1067         lea     r8,[64+r8]
1068         sub     r9,0x40
1069         jnc     NEAR $L$mod4_loop
1070
1071 $L$tail4x:
1072 DB      102,65,15,58,68,199,0
1073 DB      102,65,15,58,68,207,17
1074 DB      102,68,15,58,68,199,16
1075         xorps   xmm4,xmm12
1076         xorps   xmm0,xmm3
1077         xorps   xmm1,xmm5
1078         pxor    xmm1,xmm0
1079         pxor    xmm8,xmm4
1080
1081         pxor    xmm8,xmm1
1082         pxor    xmm1,xmm0
1083
1084         movdqa  xmm9,xmm8
1085         psrldq  xmm8,8
1086         pslldq  xmm9,8
1087         pxor    xmm1,xmm8
1088         pxor    xmm0,xmm9
1089
1090         movdqa  xmm4,xmm0
1091         movdqa  xmm3,xmm0
1092         psllq   xmm0,5
1093         pxor    xmm3,xmm0
1094         psllq   xmm0,1
1095         pxor    xmm0,xmm3
1096         psllq   xmm0,57
1097         movdqa  xmm3,xmm0
1098         pslldq  xmm0,8
1099         psrldq  xmm3,8
1100         pxor    xmm0,xmm4
1101         pxor    xmm1,xmm3
1102
1103
1104         movdqa  xmm4,xmm0
1105         psrlq   xmm0,1
1106         pxor    xmm1,xmm4
1107         pxor    xmm4,xmm0
1108         psrlq   xmm0,5
1109         pxor    xmm0,xmm4
1110         psrlq   xmm0,1
1111         pxor    xmm0,xmm1
1112         add     r9,0x40
1113         jz      NEAR $L$done
1114         movdqu  xmm7,XMMWORD[32+rdx]
1115         sub     r9,0x10
1116         jz      NEAR $L$odd_tail
1117 $L$skip4x:
1118
1119
1120
1121
1122
1123         movdqu  xmm8,XMMWORD[r8]
1124         movdqu  xmm3,XMMWORD[16+r8]
1125 DB      102,69,15,56,0,194
1126 DB      102,65,15,56,0,218
1127         pxor    xmm0,xmm8
1128
1129         movdqa  xmm5,xmm3
1130         pshufd  xmm4,xmm3,78
1131         pxor    xmm4,xmm3
1132 DB      102,15,58,68,218,0
1133 DB      102,15,58,68,234,17
1134 DB      102,15,58,68,231,0
1135
1136         lea     r8,[32+r8]
1137         nop
1138         sub     r9,0x20
1139         jbe     NEAR $L$even_tail
1140         nop
1141         jmp     NEAR $L$mod_loop
1142
1143 ALIGN   32
1144 $L$mod_loop:
1145         movdqa  xmm1,xmm0
1146         movdqa  xmm8,xmm4
1147         pshufd  xmm4,xmm0,78
1148         pxor    xmm4,xmm0
1149
1150 DB      102,15,58,68,198,0
1151 DB      102,15,58,68,206,17
1152 DB      102,15,58,68,231,16
1153
1154         pxor    xmm0,xmm3
1155         pxor    xmm1,xmm5
1156         movdqu  xmm9,XMMWORD[r8]
1157         pxor    xmm8,xmm0
1158 DB      102,69,15,56,0,202
1159         movdqu  xmm3,XMMWORD[16+r8]
1160
1161         pxor    xmm8,xmm1
1162         pxor    xmm1,xmm9
1163         pxor    xmm4,xmm8
1164 DB      102,65,15,56,0,218
1165         movdqa  xmm8,xmm4
1166         psrldq  xmm8,8
1167         pslldq  xmm4,8
1168         pxor    xmm1,xmm8
1169         pxor    xmm0,xmm4
1170
1171         movdqa  xmm5,xmm3
1172
1173         movdqa  xmm9,xmm0
1174         movdqa  xmm8,xmm0
1175         psllq   xmm0,5
1176         pxor    xmm8,xmm0
1177 DB      102,15,58,68,218,0
1178         psllq   xmm0,1
1179         pxor    xmm0,xmm8
1180         psllq   xmm0,57
1181         movdqa  xmm8,xmm0
1182         pslldq  xmm0,8
1183         psrldq  xmm8,8
1184         pxor    xmm0,xmm9
1185         pshufd  xmm4,xmm5,78
1186         pxor    xmm1,xmm8
1187         pxor    xmm4,xmm5
1188
1189         movdqa  xmm9,xmm0
1190         psrlq   xmm0,1
1191 DB      102,15,58,68,234,17
1192         pxor    xmm1,xmm9
1193         pxor    xmm9,xmm0
1194         psrlq   xmm0,5
1195         pxor    xmm0,xmm9
1196         lea     r8,[32+r8]
1197         psrlq   xmm0,1
1198 DB      102,15,58,68,231,0
1199         pxor    xmm0,xmm1
1200
1201         sub     r9,0x20
1202         ja      NEAR $L$mod_loop
1203
1204 $L$even_tail:
1205         movdqa  xmm1,xmm0
1206         movdqa  xmm8,xmm4
1207         pshufd  xmm4,xmm0,78
1208         pxor    xmm4,xmm0
1209
1210 DB      102,15,58,68,198,0
1211 DB      102,15,58,68,206,17
1212 DB      102,15,58,68,231,16
1213
1214         pxor    xmm0,xmm3
1215         pxor    xmm1,xmm5
1216         pxor    xmm8,xmm0
1217         pxor    xmm8,xmm1
1218         pxor    xmm4,xmm8
1219         movdqa  xmm8,xmm4
1220         psrldq  xmm8,8
1221         pslldq  xmm4,8
1222         pxor    xmm1,xmm8
1223         pxor    xmm0,xmm4
1224
1225         movdqa  xmm4,xmm0
1226         movdqa  xmm3,xmm0
1227         psllq   xmm0,5
1228         pxor    xmm3,xmm0
1229         psllq   xmm0,1
1230         pxor    xmm0,xmm3
1231         psllq   xmm0,57
1232         movdqa  xmm3,xmm0
1233         pslldq  xmm0,8
1234         psrldq  xmm3,8
1235         pxor    xmm0,xmm4
1236         pxor    xmm1,xmm3
1237
1238
1239         movdqa  xmm4,xmm0
1240         psrlq   xmm0,1
1241         pxor    xmm1,xmm4
1242         pxor    xmm4,xmm0
1243         psrlq   xmm0,5
1244         pxor    xmm0,xmm4
1245         psrlq   xmm0,1
1246         pxor    xmm0,xmm1
1247         test    r9,r9
1248         jnz     NEAR $L$done
1249
1250 $L$odd_tail:
1251         movdqu  xmm8,XMMWORD[r8]
1252 DB      102,69,15,56,0,194
1253         pxor    xmm0,xmm8
1254         movdqa  xmm1,xmm0
1255         pshufd  xmm3,xmm0,78
1256         pxor    xmm3,xmm0
1257 DB      102,15,58,68,194,0
1258 DB      102,15,58,68,202,17
1259 DB      102,15,58,68,223,0
1260         pxor    xmm3,xmm0
1261         pxor    xmm3,xmm1
1262
1263         movdqa  xmm4,xmm3
1264         psrldq  xmm3,8
1265         pslldq  xmm4,8
1266         pxor    xmm1,xmm3
1267         pxor    xmm0,xmm4
1268
1269         movdqa  xmm4,xmm0
1270         movdqa  xmm3,xmm0
1271         psllq   xmm0,5
1272         pxor    xmm3,xmm0
1273         psllq   xmm0,1
1274         pxor    xmm0,xmm3
1275         psllq   xmm0,57
1276         movdqa  xmm3,xmm0
1277         pslldq  xmm0,8
1278         psrldq  xmm3,8
1279         pxor    xmm0,xmm4
1280         pxor    xmm1,xmm3
1281
1282
1283         movdqa  xmm4,xmm0
1284         psrlq   xmm0,1
1285         pxor    xmm1,xmm4
1286         pxor    xmm4,xmm0
1287         psrlq   xmm0,5
1288         pxor    xmm0,xmm4
1289         psrlq   xmm0,1
1290         pxor    xmm0,xmm1
1291 $L$done:
1292 DB      102,65,15,56,0,194
1293         movdqu  XMMWORD[rcx],xmm0
1294         movaps  xmm6,XMMWORD[rsp]
1295         movaps  xmm7,XMMWORD[16+rsp]
1296         movaps  xmm8,XMMWORD[32+rsp]
1297         movaps  xmm9,XMMWORD[48+rsp]
1298         movaps  xmm10,XMMWORD[64+rsp]
1299         movaps  xmm11,XMMWORD[80+rsp]
1300         movaps  xmm12,XMMWORD[96+rsp]
1301         movaps  xmm13,XMMWORD[112+rsp]
1302         movaps  xmm14,XMMWORD[128+rsp]
1303         movaps  xmm15,XMMWORD[144+rsp]
1304         lea     rsp,[168+rsp]
1305 $L$SEH_end_gcm_ghash_clmul:
1306         DB      0F3h,0C3h               ;repret
1307
1308 global  gcm_init_avx
1309
1310 ALIGN   32
1311 gcm_init_avx:
1312         jmp     NEAR $L$_init_clmul
1313
1314 global  gcm_gmult_avx
1315
1316 ALIGN   32
1317 gcm_gmult_avx:
1318         jmp     NEAR $L$_gmult_clmul
1319
1320 global  gcm_ghash_avx
1321
1322 ALIGN   32
1323 gcm_ghash_avx:
1324         jmp     NEAR $L$_ghash_clmul
1325
1326 ALIGN   64
1327 $L$bswap_mask:
1328 DB      15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1329 $L$0x1c2_polynomial:
1330 DB      1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1331 $L$7_mask:
1332         DD      7,0,7,0
1333 $L$7_mask_poly:
1334         DD      7,0,450,0
1335 ALIGN   64
1336
1337 $L$rem_4bit:
1338         DD      0,0,0,471859200,0,943718400,0,610271232
1339         DD      0,1887436800,0,1822425088,0,1220542464,0,1423966208
1340         DD      0,3774873600,0,4246732800,0,3644850176,0,3311403008
1341         DD      0,2441084928,0,2376073216,0,2847932416,0,3051356160
1342
1343 $L$rem_8bit:
1344         DW      0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1345         DW      0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1346         DW      0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1347         DW      0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1348         DW      0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1349         DW      0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1350         DW      0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1351         DW      0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1352         DW      0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1353         DW      0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1354         DW      0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1355         DW      0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1356         DW      0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1357         DW      0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1358         DW      0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1359         DW      0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1360         DW      0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1361         DW      0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1362         DW      0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1363         DW      0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1364         DW      0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1365         DW      0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1366         DW      0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1367         DW      0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1368         DW      0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1369         DW      0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1370         DW      0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1371         DW      0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1372         DW      0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1373         DW      0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1374         DW      0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1375         DW      0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1376
1377 DB      71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1378 DB      44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1379 DB      60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1380 DB      114,103,62,0
1381 ALIGN   64
1382 EXTERN  __imp_RtlVirtualUnwind
1383
1384 ALIGN   16
1385 se_handler:
1386         push    rsi
1387         push    rdi
1388         push    rbx
1389         push    rbp
1390         push    r12
1391         push    r13
1392         push    r14
1393         push    r15
1394         pushfq
1395         sub     rsp,64
1396
1397         mov     rax,QWORD[120+r8]
1398         mov     rbx,QWORD[248+r8]
1399
1400         mov     rsi,QWORD[8+r9]
1401         mov     r11,QWORD[56+r9]
1402
1403         mov     r10d,DWORD[r11]
1404         lea     r10,[r10*1+rsi]
1405         cmp     rbx,r10
1406         jb      NEAR $L$in_prologue
1407
1408         mov     rax,QWORD[152+r8]
1409
1410         mov     r10d,DWORD[4+r11]
1411         lea     r10,[r10*1+rsi]
1412         cmp     rbx,r10
1413         jae     NEAR $L$in_prologue
1414
1415         lea     rax,[24+rax]
1416
1417         mov     rbx,QWORD[((-8))+rax]
1418         mov     rbp,QWORD[((-16))+rax]
1419         mov     r12,QWORD[((-24))+rax]
1420         mov     QWORD[144+r8],rbx
1421         mov     QWORD[160+r8],rbp
1422         mov     QWORD[216+r8],r12
1423
1424 $L$in_prologue:
1425         mov     rdi,QWORD[8+rax]
1426         mov     rsi,QWORD[16+rax]
1427         mov     QWORD[152+r8],rax
1428         mov     QWORD[168+r8],rsi
1429         mov     QWORD[176+r8],rdi
1430
1431         mov     rdi,QWORD[40+r9]
1432         mov     rsi,r8
1433         mov     ecx,154
1434         DD      0xa548f3fc
1435
1436         mov     rsi,r9
1437         xor     rcx,rcx
1438         mov     rdx,QWORD[8+rsi]
1439         mov     r8,QWORD[rsi]
1440         mov     r9,QWORD[16+rsi]
1441         mov     r10,QWORD[40+rsi]
1442         lea     r11,[56+rsi]
1443         lea     r12,[24+rsi]
1444         mov     QWORD[32+rsp],r10
1445         mov     QWORD[40+rsp],r11
1446         mov     QWORD[48+rsp],r12
1447         mov     QWORD[56+rsp],rcx
1448         call    QWORD[__imp_RtlVirtualUnwind]
1449
1450         mov     eax,1
1451         add     rsp,64
1452         popfq
1453         pop     r15
1454         pop     r14
1455         pop     r13
1456         pop     r12
1457         pop     rbp
1458         pop     rbx
1459         pop     rdi
1460         pop     rsi
1461         DB      0F3h,0C3h               ;repret
1462
1463
1464 section .pdata rdata align=4
1465 ALIGN   4
1466         DD      $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
1467         DD      $L$SEH_end_gcm_gmult_4bit wrt ..imagebase
1468         DD      $L$SEH_info_gcm_gmult_4bit wrt ..imagebase
1469
1470         DD      $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
1471         DD      $L$SEH_end_gcm_ghash_4bit wrt ..imagebase
1472         DD      $L$SEH_info_gcm_ghash_4bit wrt ..imagebase
1473
1474         DD      $L$SEH_begin_gcm_init_clmul wrt ..imagebase
1475         DD      $L$SEH_end_gcm_init_clmul wrt ..imagebase
1476         DD      $L$SEH_info_gcm_init_clmul wrt ..imagebase
1477
1478         DD      $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
1479         DD      $L$SEH_end_gcm_ghash_clmul wrt ..imagebase
1480         DD      $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
1481 section .xdata rdata align=8
1482 ALIGN   8
1483 $L$SEH_info_gcm_gmult_4bit:
1484 DB      9,0,0,0
1485         DD      se_handler wrt ..imagebase
1486         DD      $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
1487 $L$SEH_info_gcm_ghash_4bit:
1488 DB      9,0,0,0
1489         DD      se_handler wrt ..imagebase
1490         DD      $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
1491 $L$SEH_info_gcm_init_clmul:
1492 DB      0x01,0x08,0x03,0x00
1493 DB      0x08,0x68,0x00,0x00
1494 DB      0x04,0x22,0x00,0x00
1495 $L$SEH_info_gcm_ghash_clmul:
1496 DB      0x01,0x33,0x16,0x00
1497 DB      0x33,0xf8,0x09,0x00
1498 DB      0x2e,0xe8,0x08,0x00
1499 DB      0x29,0xd8,0x07,0x00
1500 DB      0x24,0xc8,0x06,0x00
1501 DB      0x1f,0xb8,0x05,0x00
1502 DB      0x1a,0xa8,0x04,0x00
1503 DB      0x15,0x98,0x03,0x00
1504 DB      0x10,0x88,0x02,0x00
1505 DB      0x0c,0x78,0x01,0x00
1506 DB      0x08,0x68,0x00,0x00
1507 DB      0x04,0x01,0x15,0x00