Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / boringssl / win-x86_64 / crypto / aes / bsaes-x86_64.asm
1 OPTION  DOTNAME
2 .text$  SEGMENT ALIGN(256) 'CODE'
3
4 EXTERN  asm_AES_encrypt:NEAR
5 EXTERN  asm_AES_decrypt:NEAR
6
7
8 ALIGN   64
9 _bsaes_encrypt8 PROC PRIVATE
10         lea     r11,QWORD PTR[$L$BS0]
11
12         movdqa  xmm8,XMMWORD PTR[rax]
13         lea     rax,QWORD PTR[16+rax]
14         movdqa  xmm7,XMMWORD PTR[80+r11]
15         pxor    xmm15,xmm8
16         pxor    xmm0,xmm8
17         pxor    xmm1,xmm8
18         pxor    xmm2,xmm8
19 DB      102,68,15,56,0,255
20 DB      102,15,56,0,199
21         pxor    xmm3,xmm8
22         pxor    xmm4,xmm8
23 DB      102,15,56,0,207
24 DB      102,15,56,0,215
25         pxor    xmm5,xmm8
26         pxor    xmm6,xmm8
27 DB      102,15,56,0,223
28 DB      102,15,56,0,231
29 DB      102,15,56,0,239
30 DB      102,15,56,0,247
31 _bsaes_encrypt8_bitslice::
32         movdqa  xmm7,XMMWORD PTR[r11]
33         movdqa  xmm8,XMMWORD PTR[16+r11]
34         movdqa  xmm9,xmm5
35         psrlq   xmm5,1
36         movdqa  xmm10,xmm3
37         psrlq   xmm3,1
38         pxor    xmm5,xmm6
39         pxor    xmm3,xmm4
40         pand    xmm5,xmm7
41         pand    xmm3,xmm7
42         pxor    xmm6,xmm5
43         psllq   xmm5,1
44         pxor    xmm4,xmm3
45         psllq   xmm3,1
46         pxor    xmm5,xmm9
47         pxor    xmm3,xmm10
48         movdqa  xmm9,xmm1
49         psrlq   xmm1,1
50         movdqa  xmm10,xmm15
51         psrlq   xmm15,1
52         pxor    xmm1,xmm2
53         pxor    xmm15,xmm0
54         pand    xmm1,xmm7
55         pand    xmm15,xmm7
56         pxor    xmm2,xmm1
57         psllq   xmm1,1
58         pxor    xmm0,xmm15
59         psllq   xmm15,1
60         pxor    xmm1,xmm9
61         pxor    xmm15,xmm10
62         movdqa  xmm7,XMMWORD PTR[32+r11]
63         movdqa  xmm9,xmm4
64         psrlq   xmm4,2
65         movdqa  xmm10,xmm3
66         psrlq   xmm3,2
67         pxor    xmm4,xmm6
68         pxor    xmm3,xmm5
69         pand    xmm4,xmm8
70         pand    xmm3,xmm8
71         pxor    xmm6,xmm4
72         psllq   xmm4,2
73         pxor    xmm5,xmm3
74         psllq   xmm3,2
75         pxor    xmm4,xmm9
76         pxor    xmm3,xmm10
77         movdqa  xmm9,xmm0
78         psrlq   xmm0,2
79         movdqa  xmm10,xmm15
80         psrlq   xmm15,2
81         pxor    xmm0,xmm2
82         pxor    xmm15,xmm1
83         pand    xmm0,xmm8
84         pand    xmm15,xmm8
85         pxor    xmm2,xmm0
86         psllq   xmm0,2
87         pxor    xmm1,xmm15
88         psllq   xmm15,2
89         pxor    xmm0,xmm9
90         pxor    xmm15,xmm10
91         movdqa  xmm9,xmm2
92         psrlq   xmm2,4
93         movdqa  xmm10,xmm1
94         psrlq   xmm1,4
95         pxor    xmm2,xmm6
96         pxor    xmm1,xmm5
97         pand    xmm2,xmm7
98         pand    xmm1,xmm7
99         pxor    xmm6,xmm2
100         psllq   xmm2,4
101         pxor    xmm5,xmm1
102         psllq   xmm1,4
103         pxor    xmm2,xmm9
104         pxor    xmm1,xmm10
105         movdqa  xmm9,xmm0
106         psrlq   xmm0,4
107         movdqa  xmm10,xmm15
108         psrlq   xmm15,4
109         pxor    xmm0,xmm4
110         pxor    xmm15,xmm3
111         pand    xmm0,xmm7
112         pand    xmm15,xmm7
113         pxor    xmm4,xmm0
114         psllq   xmm0,4
115         pxor    xmm3,xmm15
116         psllq   xmm15,4
117         pxor    xmm0,xmm9
118         pxor    xmm15,xmm10
119         dec     r10d
120         jmp     $L$enc_sbox
121 ALIGN   16
122 $L$enc_loop::
123         pxor    xmm15,XMMWORD PTR[rax]
124         pxor    xmm0,XMMWORD PTR[16+rax]
125         pxor    xmm1,XMMWORD PTR[32+rax]
126         pxor    xmm2,XMMWORD PTR[48+rax]
127 DB      102,68,15,56,0,255
128 DB      102,15,56,0,199
129         pxor    xmm3,XMMWORD PTR[64+rax]
130         pxor    xmm4,XMMWORD PTR[80+rax]
131 DB      102,15,56,0,207
132 DB      102,15,56,0,215
133         pxor    xmm5,XMMWORD PTR[96+rax]
134         pxor    xmm6,XMMWORD PTR[112+rax]
135 DB      102,15,56,0,223
136 DB      102,15,56,0,231
137 DB      102,15,56,0,239
138 DB      102,15,56,0,247
139         lea     rax,QWORD PTR[128+rax]
140 $L$enc_sbox::
141         pxor    xmm4,xmm5
142         pxor    xmm1,xmm0
143         pxor    xmm2,xmm15
144         pxor    xmm5,xmm1
145         pxor    xmm4,xmm15
146
147         pxor    xmm5,xmm2
148         pxor    xmm2,xmm6
149         pxor    xmm6,xmm4
150         pxor    xmm2,xmm3
151         pxor    xmm3,xmm4
152         pxor    xmm2,xmm0
153
154         pxor    xmm1,xmm6
155         pxor    xmm0,xmm4
156         movdqa  xmm10,xmm6
157         movdqa  xmm9,xmm0
158         movdqa  xmm8,xmm4
159         movdqa  xmm12,xmm1
160         movdqa  xmm11,xmm5
161
162         pxor    xmm10,xmm3
163         pxor    xmm9,xmm1
164         pxor    xmm8,xmm2
165         movdqa  xmm13,xmm10
166         pxor    xmm12,xmm3
167         movdqa  xmm7,xmm9
168         pxor    xmm11,xmm15
169         movdqa  xmm14,xmm10
170
171         por     xmm9,xmm8
172         por     xmm10,xmm11
173         pxor    xmm14,xmm7
174         pand    xmm13,xmm11
175         pxor    xmm11,xmm8
176         pand    xmm7,xmm8
177         pand    xmm14,xmm11
178         movdqa  xmm11,xmm2
179         pxor    xmm11,xmm15
180         pand    xmm12,xmm11
181         pxor    xmm10,xmm12
182         pxor    xmm9,xmm12
183         movdqa  xmm12,xmm6
184         movdqa  xmm11,xmm4
185         pxor    xmm12,xmm0
186         pxor    xmm11,xmm5
187         movdqa  xmm8,xmm12
188         pand    xmm12,xmm11
189         por     xmm8,xmm11
190         pxor    xmm7,xmm12
191         pxor    xmm10,xmm14
192         pxor    xmm9,xmm13
193         pxor    xmm8,xmm14
194         movdqa  xmm11,xmm1
195         pxor    xmm7,xmm13
196         movdqa  xmm12,xmm3
197         pxor    xmm8,xmm13
198         movdqa  xmm13,xmm0
199         pand    xmm11,xmm2
200         movdqa  xmm14,xmm6
201         pand    xmm12,xmm15
202         pand    xmm13,xmm4
203         por     xmm14,xmm5
204         pxor    xmm10,xmm11
205         pxor    xmm9,xmm12
206         pxor    xmm8,xmm13
207         pxor    xmm7,xmm14
208
209
210
211
212
213         movdqa  xmm11,xmm10
214         pand    xmm10,xmm8
215         pxor    xmm11,xmm9
216
217         movdqa  xmm13,xmm7
218         movdqa  xmm14,xmm11
219         pxor    xmm13,xmm10
220         pand    xmm14,xmm13
221
222         movdqa  xmm12,xmm8
223         pxor    xmm14,xmm9
224         pxor    xmm12,xmm7
225
226         pxor    xmm10,xmm9
227
228         pand    xmm12,xmm10
229
230         movdqa  xmm9,xmm13
231         pxor    xmm12,xmm7
232
233         pxor    xmm9,xmm12
234         pxor    xmm8,xmm12
235
236         pand    xmm9,xmm7
237
238         pxor    xmm13,xmm9
239         pxor    xmm8,xmm9
240
241         pand    xmm13,xmm14
242
243         pxor    xmm13,xmm11
244         movdqa  xmm11,xmm5
245         movdqa  xmm7,xmm4
246         movdqa  xmm9,xmm14
247         pxor    xmm9,xmm13
248         pand    xmm9,xmm5
249         pxor    xmm5,xmm4
250         pand    xmm4,xmm14
251         pand    xmm5,xmm13
252         pxor    xmm5,xmm4
253         pxor    xmm4,xmm9
254         pxor    xmm11,xmm15
255         pxor    xmm7,xmm2
256         pxor    xmm14,xmm12
257         pxor    xmm13,xmm8
258         movdqa  xmm10,xmm14
259         movdqa  xmm9,xmm12
260         pxor    xmm10,xmm13
261         pxor    xmm9,xmm8
262         pand    xmm10,xmm11
263         pand    xmm9,xmm15
264         pxor    xmm11,xmm7
265         pxor    xmm15,xmm2
266         pand    xmm7,xmm14
267         pand    xmm2,xmm12
268         pand    xmm11,xmm13
269         pand    xmm15,xmm8
270         pxor    xmm7,xmm11
271         pxor    xmm15,xmm2
272         pxor    xmm11,xmm10
273         pxor    xmm2,xmm9
274         pxor    xmm5,xmm11
275         pxor    xmm15,xmm11
276         pxor    xmm4,xmm7
277         pxor    xmm2,xmm7
278
279         movdqa  xmm11,xmm6
280         movdqa  xmm7,xmm0
281         pxor    xmm11,xmm3
282         pxor    xmm7,xmm1
283         movdqa  xmm10,xmm14
284         movdqa  xmm9,xmm12
285         pxor    xmm10,xmm13
286         pxor    xmm9,xmm8
287         pand    xmm10,xmm11
288         pand    xmm9,xmm3
289         pxor    xmm11,xmm7
290         pxor    xmm3,xmm1
291         pand    xmm7,xmm14
292         pand    xmm1,xmm12
293         pand    xmm11,xmm13
294         pand    xmm3,xmm8
295         pxor    xmm7,xmm11
296         pxor    xmm3,xmm1
297         pxor    xmm11,xmm10
298         pxor    xmm1,xmm9
299         pxor    xmm14,xmm12
300         pxor    xmm13,xmm8
301         movdqa  xmm10,xmm14
302         pxor    xmm10,xmm13
303         pand    xmm10,xmm6
304         pxor    xmm6,xmm0
305         pand    xmm0,xmm14
306         pand    xmm6,xmm13
307         pxor    xmm6,xmm0
308         pxor    xmm0,xmm10
309         pxor    xmm6,xmm11
310         pxor    xmm3,xmm11
311         pxor    xmm0,xmm7
312         pxor    xmm1,xmm7
313         pxor    xmm6,xmm15
314         pxor    xmm0,xmm5
315         pxor    xmm3,xmm6
316         pxor    xmm5,xmm15
317         pxor    xmm15,xmm0
318
319         pxor    xmm0,xmm4
320         pxor    xmm4,xmm1
321         pxor    xmm1,xmm2
322         pxor    xmm2,xmm4
323         pxor    xmm3,xmm4
324
325         pxor    xmm5,xmm2
326         dec     r10d
327         jl      $L$enc_done
328         pshufd  xmm7,xmm15,093h
329         pshufd  xmm8,xmm0,093h
330         pxor    xmm15,xmm7
331         pshufd  xmm9,xmm3,093h
332         pxor    xmm0,xmm8
333         pshufd  xmm10,xmm5,093h
334         pxor    xmm3,xmm9
335         pshufd  xmm11,xmm2,093h
336         pxor    xmm5,xmm10
337         pshufd  xmm12,xmm6,093h
338         pxor    xmm2,xmm11
339         pshufd  xmm13,xmm1,093h
340         pxor    xmm6,xmm12
341         pshufd  xmm14,xmm4,093h
342         pxor    xmm1,xmm13
343         pxor    xmm4,xmm14
344
345         pxor    xmm8,xmm15
346         pxor    xmm7,xmm4
347         pxor    xmm8,xmm4
348         pshufd  xmm15,xmm15,04Eh
349         pxor    xmm9,xmm0
350         pshufd  xmm0,xmm0,04Eh
351         pxor    xmm12,xmm2
352         pxor    xmm15,xmm7
353         pxor    xmm13,xmm6
354         pxor    xmm0,xmm8
355         pxor    xmm11,xmm5
356         pshufd  xmm7,xmm2,04Eh
357         pxor    xmm14,xmm1
358         pshufd  xmm8,xmm6,04Eh
359         pxor    xmm10,xmm3
360         pshufd  xmm2,xmm5,04Eh
361         pxor    xmm10,xmm4
362         pshufd  xmm6,xmm4,04Eh
363         pxor    xmm11,xmm4
364         pshufd  xmm5,xmm1,04Eh
365         pxor    xmm7,xmm11
366         pshufd  xmm1,xmm3,04Eh
367         pxor    xmm8,xmm12
368         pxor    xmm2,xmm10
369         pxor    xmm6,xmm14
370         pxor    xmm5,xmm13
371         movdqa  xmm3,xmm7
372         pxor    xmm1,xmm9
373         movdqa  xmm4,xmm8
374         movdqa  xmm7,XMMWORD PTR[48+r11]
375         jnz     $L$enc_loop
376         movdqa  xmm7,XMMWORD PTR[64+r11]
377         jmp     $L$enc_loop
378 ALIGN   16
379 $L$enc_done::
380         movdqa  xmm7,XMMWORD PTR[r11]
381         movdqa  xmm8,XMMWORD PTR[16+r11]
382         movdqa  xmm9,xmm1
383         psrlq   xmm1,1
384         movdqa  xmm10,xmm2
385         psrlq   xmm2,1
386         pxor    xmm1,xmm4
387         pxor    xmm2,xmm6
388         pand    xmm1,xmm7
389         pand    xmm2,xmm7
390         pxor    xmm4,xmm1
391         psllq   xmm1,1
392         pxor    xmm6,xmm2
393         psllq   xmm2,1
394         pxor    xmm1,xmm9
395         pxor    xmm2,xmm10
396         movdqa  xmm9,xmm3
397         psrlq   xmm3,1
398         movdqa  xmm10,xmm15
399         psrlq   xmm15,1
400         pxor    xmm3,xmm5
401         pxor    xmm15,xmm0
402         pand    xmm3,xmm7
403         pand    xmm15,xmm7
404         pxor    xmm5,xmm3
405         psllq   xmm3,1
406         pxor    xmm0,xmm15
407         psllq   xmm15,1
408         pxor    xmm3,xmm9
409         pxor    xmm15,xmm10
410         movdqa  xmm7,XMMWORD PTR[32+r11]
411         movdqa  xmm9,xmm6
412         psrlq   xmm6,2
413         movdqa  xmm10,xmm2
414         psrlq   xmm2,2
415         pxor    xmm6,xmm4
416         pxor    xmm2,xmm1
417         pand    xmm6,xmm8
418         pand    xmm2,xmm8
419         pxor    xmm4,xmm6
420         psllq   xmm6,2
421         pxor    xmm1,xmm2
422         psllq   xmm2,2
423         pxor    xmm6,xmm9
424         pxor    xmm2,xmm10
425         movdqa  xmm9,xmm0
426         psrlq   xmm0,2
427         movdqa  xmm10,xmm15
428         psrlq   xmm15,2
429         pxor    xmm0,xmm5
430         pxor    xmm15,xmm3
431         pand    xmm0,xmm8
432         pand    xmm15,xmm8
433         pxor    xmm5,xmm0
434         psllq   xmm0,2
435         pxor    xmm3,xmm15
436         psllq   xmm15,2
437         pxor    xmm0,xmm9
438         pxor    xmm15,xmm10
439         movdqa  xmm9,xmm5
440         psrlq   xmm5,4
441         movdqa  xmm10,xmm3
442         psrlq   xmm3,4
443         pxor    xmm5,xmm4
444         pxor    xmm3,xmm1
445         pand    xmm5,xmm7
446         pand    xmm3,xmm7
447         pxor    xmm4,xmm5
448         psllq   xmm5,4
449         pxor    xmm1,xmm3
450         psllq   xmm3,4
451         pxor    xmm5,xmm9
452         pxor    xmm3,xmm10
453         movdqa  xmm9,xmm0
454         psrlq   xmm0,4
455         movdqa  xmm10,xmm15
456         psrlq   xmm15,4
457         pxor    xmm0,xmm6
458         pxor    xmm15,xmm2
459         pand    xmm0,xmm7
460         pand    xmm15,xmm7
461         pxor    xmm6,xmm0
462         psllq   xmm0,4
463         pxor    xmm2,xmm15
464         psllq   xmm15,4
465         pxor    xmm0,xmm9
466         pxor    xmm15,xmm10
467         movdqa  xmm7,XMMWORD PTR[rax]
468         pxor    xmm3,xmm7
469         pxor    xmm5,xmm7
470         pxor    xmm2,xmm7
471         pxor    xmm6,xmm7
472         pxor    xmm1,xmm7
473         pxor    xmm4,xmm7
474         pxor    xmm15,xmm7
475         pxor    xmm0,xmm7
476         DB      0F3h,0C3h               ;repret
477 _bsaes_encrypt8 ENDP
478
479
480 ALIGN   64
481 _bsaes_decrypt8 PROC PRIVATE
482         lea     r11,QWORD PTR[$L$BS0]
483
484         movdqa  xmm8,XMMWORD PTR[rax]
485         lea     rax,QWORD PTR[16+rax]
486         movdqa  xmm7,XMMWORD PTR[((-48))+r11]
487         pxor    xmm15,xmm8
488         pxor    xmm0,xmm8
489         pxor    xmm1,xmm8
490         pxor    xmm2,xmm8
491 DB      102,68,15,56,0,255
492 DB      102,15,56,0,199
493         pxor    xmm3,xmm8
494         pxor    xmm4,xmm8
495 DB      102,15,56,0,207
496 DB      102,15,56,0,215
497         pxor    xmm5,xmm8
498         pxor    xmm6,xmm8
499 DB      102,15,56,0,223
500 DB      102,15,56,0,231
501 DB      102,15,56,0,239
502 DB      102,15,56,0,247
503         movdqa  xmm7,XMMWORD PTR[r11]
504         movdqa  xmm8,XMMWORD PTR[16+r11]
505         movdqa  xmm9,xmm5
506         psrlq   xmm5,1
507         movdqa  xmm10,xmm3
508         psrlq   xmm3,1
509         pxor    xmm5,xmm6
510         pxor    xmm3,xmm4
511         pand    xmm5,xmm7
512         pand    xmm3,xmm7
513         pxor    xmm6,xmm5
514         psllq   xmm5,1
515         pxor    xmm4,xmm3
516         psllq   xmm3,1
517         pxor    xmm5,xmm9
518         pxor    xmm3,xmm10
519         movdqa  xmm9,xmm1
520         psrlq   xmm1,1
521         movdqa  xmm10,xmm15
522         psrlq   xmm15,1
523         pxor    xmm1,xmm2
524         pxor    xmm15,xmm0
525         pand    xmm1,xmm7
526         pand    xmm15,xmm7
527         pxor    xmm2,xmm1
528         psllq   xmm1,1
529         pxor    xmm0,xmm15
530         psllq   xmm15,1
531         pxor    xmm1,xmm9
532         pxor    xmm15,xmm10
533         movdqa  xmm7,XMMWORD PTR[32+r11]
534         movdqa  xmm9,xmm4
535         psrlq   xmm4,2
536         movdqa  xmm10,xmm3
537         psrlq   xmm3,2
538         pxor    xmm4,xmm6
539         pxor    xmm3,xmm5
540         pand    xmm4,xmm8
541         pand    xmm3,xmm8
542         pxor    xmm6,xmm4
543         psllq   xmm4,2
544         pxor    xmm5,xmm3
545         psllq   xmm3,2
546         pxor    xmm4,xmm9
547         pxor    xmm3,xmm10
548         movdqa  xmm9,xmm0
549         psrlq   xmm0,2
550         movdqa  xmm10,xmm15
551         psrlq   xmm15,2
552         pxor    xmm0,xmm2
553         pxor    xmm15,xmm1
554         pand    xmm0,xmm8
555         pand    xmm15,xmm8
556         pxor    xmm2,xmm0
557         psllq   xmm0,2
558         pxor    xmm1,xmm15
559         psllq   xmm15,2
560         pxor    xmm0,xmm9
561         pxor    xmm15,xmm10
562         movdqa  xmm9,xmm2
563         psrlq   xmm2,4
564         movdqa  xmm10,xmm1
565         psrlq   xmm1,4
566         pxor    xmm2,xmm6
567         pxor    xmm1,xmm5
568         pand    xmm2,xmm7
569         pand    xmm1,xmm7
570         pxor    xmm6,xmm2
571         psllq   xmm2,4
572         pxor    xmm5,xmm1
573         psllq   xmm1,4
574         pxor    xmm2,xmm9
575         pxor    xmm1,xmm10
576         movdqa  xmm9,xmm0
577         psrlq   xmm0,4
578         movdqa  xmm10,xmm15
579         psrlq   xmm15,4
580         pxor    xmm0,xmm4
581         pxor    xmm15,xmm3
582         pand    xmm0,xmm7
583         pand    xmm15,xmm7
584         pxor    xmm4,xmm0
585         psllq   xmm0,4
586         pxor    xmm3,xmm15
587         psllq   xmm15,4
588         pxor    xmm0,xmm9
589         pxor    xmm15,xmm10
590         dec     r10d
591         jmp     $L$dec_sbox
592 ALIGN   16
593 $L$dec_loop::
594         pxor    xmm15,XMMWORD PTR[rax]
595         pxor    xmm0,XMMWORD PTR[16+rax]
596         pxor    xmm1,XMMWORD PTR[32+rax]
597         pxor    xmm2,XMMWORD PTR[48+rax]
598 DB      102,68,15,56,0,255
599 DB      102,15,56,0,199
600         pxor    xmm3,XMMWORD PTR[64+rax]
601         pxor    xmm4,XMMWORD PTR[80+rax]
602 DB      102,15,56,0,207
603 DB      102,15,56,0,215
604         pxor    xmm5,XMMWORD PTR[96+rax]
605         pxor    xmm6,XMMWORD PTR[112+rax]
606 DB      102,15,56,0,223
607 DB      102,15,56,0,231
608 DB      102,15,56,0,239
609 DB      102,15,56,0,247
610         lea     rax,QWORD PTR[128+rax]
611 $L$dec_sbox::
612         pxor    xmm2,xmm3
613
614         pxor    xmm3,xmm6
615         pxor    xmm1,xmm6
616         pxor    xmm5,xmm3
617         pxor    xmm6,xmm5
618         pxor    xmm0,xmm6
619
620         pxor    xmm15,xmm0
621         pxor    xmm1,xmm4
622         pxor    xmm2,xmm15
623         pxor    xmm4,xmm15
624         pxor    xmm0,xmm2
625         movdqa  xmm10,xmm2
626         movdqa  xmm9,xmm6
627         movdqa  xmm8,xmm0
628         movdqa  xmm12,xmm3
629         movdqa  xmm11,xmm4
630
631         pxor    xmm10,xmm15
632         pxor    xmm9,xmm3
633         pxor    xmm8,xmm5
634         movdqa  xmm13,xmm10
635         pxor    xmm12,xmm15
636         movdqa  xmm7,xmm9
637         pxor    xmm11,xmm1
638         movdqa  xmm14,xmm10
639
640         por     xmm9,xmm8
641         por     xmm10,xmm11
642         pxor    xmm14,xmm7
643         pand    xmm13,xmm11
644         pxor    xmm11,xmm8
645         pand    xmm7,xmm8
646         pand    xmm14,xmm11
647         movdqa  xmm11,xmm5
648         pxor    xmm11,xmm1
649         pand    xmm12,xmm11
650         pxor    xmm10,xmm12
651         pxor    xmm9,xmm12
652         movdqa  xmm12,xmm2
653         movdqa  xmm11,xmm0
654         pxor    xmm12,xmm6
655         pxor    xmm11,xmm4
656         movdqa  xmm8,xmm12
657         pand    xmm12,xmm11
658         por     xmm8,xmm11
659         pxor    xmm7,xmm12
660         pxor    xmm10,xmm14
661         pxor    xmm9,xmm13
662         pxor    xmm8,xmm14
663         movdqa  xmm11,xmm3
664         pxor    xmm7,xmm13
665         movdqa  xmm12,xmm15
666         pxor    xmm8,xmm13
667         movdqa  xmm13,xmm6
668         pand    xmm11,xmm5
669         movdqa  xmm14,xmm2
670         pand    xmm12,xmm1
671         pand    xmm13,xmm0
672         por     xmm14,xmm4
673         pxor    xmm10,xmm11
674         pxor    xmm9,xmm12
675         pxor    xmm8,xmm13
676         pxor    xmm7,xmm14
677
678
679
680
681
682         movdqa  xmm11,xmm10
683         pand    xmm10,xmm8
684         pxor    xmm11,xmm9
685
686         movdqa  xmm13,xmm7
687         movdqa  xmm14,xmm11
688         pxor    xmm13,xmm10
689         pand    xmm14,xmm13
690
691         movdqa  xmm12,xmm8
692         pxor    xmm14,xmm9
693         pxor    xmm12,xmm7
694
695         pxor    xmm10,xmm9
696
697         pand    xmm12,xmm10
698
699         movdqa  xmm9,xmm13
700         pxor    xmm12,xmm7
701
702         pxor    xmm9,xmm12
703         pxor    xmm8,xmm12
704
705         pand    xmm9,xmm7
706
707         pxor    xmm13,xmm9
708         pxor    xmm8,xmm9
709
710         pand    xmm13,xmm14
711
712         pxor    xmm13,xmm11
713         movdqa  xmm11,xmm4
714         movdqa  xmm7,xmm0
715         movdqa  xmm9,xmm14
716         pxor    xmm9,xmm13
717         pand    xmm9,xmm4
718         pxor    xmm4,xmm0
719         pand    xmm0,xmm14
720         pand    xmm4,xmm13
721         pxor    xmm4,xmm0
722         pxor    xmm0,xmm9
723         pxor    xmm11,xmm1
724         pxor    xmm7,xmm5
725         pxor    xmm14,xmm12
726         pxor    xmm13,xmm8
727         movdqa  xmm10,xmm14
728         movdqa  xmm9,xmm12
729         pxor    xmm10,xmm13
730         pxor    xmm9,xmm8
731         pand    xmm10,xmm11
732         pand    xmm9,xmm1
733         pxor    xmm11,xmm7
734         pxor    xmm1,xmm5
735         pand    xmm7,xmm14
736         pand    xmm5,xmm12
737         pand    xmm11,xmm13
738         pand    xmm1,xmm8
739         pxor    xmm7,xmm11
740         pxor    xmm1,xmm5
741         pxor    xmm11,xmm10
742         pxor    xmm5,xmm9
743         pxor    xmm4,xmm11
744         pxor    xmm1,xmm11
745         pxor    xmm0,xmm7
746         pxor    xmm5,xmm7
747
748         movdqa  xmm11,xmm2
749         movdqa  xmm7,xmm6
750         pxor    xmm11,xmm15
751         pxor    xmm7,xmm3
752         movdqa  xmm10,xmm14
753         movdqa  xmm9,xmm12
754         pxor    xmm10,xmm13
755         pxor    xmm9,xmm8
756         pand    xmm10,xmm11
757         pand    xmm9,xmm15
758         pxor    xmm11,xmm7
759         pxor    xmm15,xmm3
760         pand    xmm7,xmm14
761         pand    xmm3,xmm12
762         pand    xmm11,xmm13
763         pand    xmm15,xmm8
764         pxor    xmm7,xmm11
765         pxor    xmm15,xmm3
766         pxor    xmm11,xmm10
767         pxor    xmm3,xmm9
768         pxor    xmm14,xmm12
769         pxor    xmm13,xmm8
770         movdqa  xmm10,xmm14
771         pxor    xmm10,xmm13
772         pand    xmm10,xmm2
773         pxor    xmm2,xmm6
774         pand    xmm6,xmm14
775         pand    xmm2,xmm13
776         pxor    xmm2,xmm6
777         pxor    xmm6,xmm10
778         pxor    xmm2,xmm11
779         pxor    xmm15,xmm11
780         pxor    xmm6,xmm7
781         pxor    xmm3,xmm7
782         pxor    xmm0,xmm6
783         pxor    xmm5,xmm4
784
785         pxor    xmm3,xmm0
786         pxor    xmm1,xmm6
787         pxor    xmm4,xmm6
788         pxor    xmm3,xmm1
789         pxor    xmm6,xmm15
790         pxor    xmm3,xmm4
791         pxor    xmm2,xmm5
792         pxor    xmm5,xmm0
793         pxor    xmm2,xmm3
794
795         pxor    xmm3,xmm15
796         pxor    xmm6,xmm2
797         dec     r10d
798         jl      $L$dec_done
799
800         pshufd  xmm7,xmm15,04Eh
801         pshufd  xmm13,xmm2,04Eh
802         pxor    xmm7,xmm15
803         pshufd  xmm14,xmm4,04Eh
804         pxor    xmm13,xmm2
805         pshufd  xmm8,xmm0,04Eh
806         pxor    xmm14,xmm4
807         pshufd  xmm9,xmm5,04Eh
808         pxor    xmm8,xmm0
809         pshufd  xmm10,xmm3,04Eh
810         pxor    xmm9,xmm5
811         pxor    xmm15,xmm13
812         pxor    xmm0,xmm13
813         pshufd  xmm11,xmm1,04Eh
814         pxor    xmm10,xmm3
815         pxor    xmm5,xmm7
816         pxor    xmm3,xmm8
817         pshufd  xmm12,xmm6,04Eh
818         pxor    xmm11,xmm1
819         pxor    xmm0,xmm14
820         pxor    xmm1,xmm9
821         pxor    xmm12,xmm6
822
823         pxor    xmm5,xmm14
824         pxor    xmm3,xmm13
825         pxor    xmm1,xmm13
826         pxor    xmm6,xmm10
827         pxor    xmm2,xmm11
828         pxor    xmm1,xmm14
829         pxor    xmm6,xmm14
830         pxor    xmm4,xmm12
831         pshufd  xmm7,xmm15,093h
832         pshufd  xmm8,xmm0,093h
833         pxor    xmm15,xmm7
834         pshufd  xmm9,xmm5,093h
835         pxor    xmm0,xmm8
836         pshufd  xmm10,xmm3,093h
837         pxor    xmm5,xmm9
838         pshufd  xmm11,xmm1,093h
839         pxor    xmm3,xmm10
840         pshufd  xmm12,xmm6,093h
841         pxor    xmm1,xmm11
842         pshufd  xmm13,xmm2,093h
843         pxor    xmm6,xmm12
844         pshufd  xmm14,xmm4,093h
845         pxor    xmm2,xmm13
846         pxor    xmm4,xmm14
847
848         pxor    xmm8,xmm15
849         pxor    xmm7,xmm4
850         pxor    xmm8,xmm4
851         pshufd  xmm15,xmm15,04Eh
852         pxor    xmm9,xmm0
853         pshufd  xmm0,xmm0,04Eh
854         pxor    xmm12,xmm1
855         pxor    xmm15,xmm7
856         pxor    xmm13,xmm6
857         pxor    xmm0,xmm8
858         pxor    xmm11,xmm3
859         pshufd  xmm7,xmm1,04Eh
860         pxor    xmm14,xmm2
861         pshufd  xmm8,xmm6,04Eh
862         pxor    xmm10,xmm5
863         pshufd  xmm1,xmm3,04Eh
864         pxor    xmm10,xmm4
865         pshufd  xmm6,xmm4,04Eh
866         pxor    xmm11,xmm4
867         pshufd  xmm3,xmm2,04Eh
868         pxor    xmm7,xmm11
869         pshufd  xmm2,xmm5,04Eh
870         pxor    xmm8,xmm12
871         pxor    xmm10,xmm1
872         pxor    xmm6,xmm14
873         pxor    xmm13,xmm3
874         movdqa  xmm3,xmm7
875         pxor    xmm2,xmm9
876         movdqa  xmm5,xmm13
877         movdqa  xmm4,xmm8
878         movdqa  xmm1,xmm2
879         movdqa  xmm2,xmm10
880         movdqa  xmm7,XMMWORD PTR[((-16))+r11]
881         jnz     $L$dec_loop
882         movdqa  xmm7,XMMWORD PTR[((-32))+r11]
883         jmp     $L$dec_loop
884 ALIGN   16
885 $L$dec_done::
886         movdqa  xmm7,XMMWORD PTR[r11]
887         movdqa  xmm8,XMMWORD PTR[16+r11]
888         movdqa  xmm9,xmm2
889         psrlq   xmm2,1
890         movdqa  xmm10,xmm1
891         psrlq   xmm1,1
892         pxor    xmm2,xmm4
893         pxor    xmm1,xmm6
894         pand    xmm2,xmm7
895         pand    xmm1,xmm7
896         pxor    xmm4,xmm2
897         psllq   xmm2,1
898         pxor    xmm6,xmm1
899         psllq   xmm1,1
900         pxor    xmm2,xmm9
901         pxor    xmm1,xmm10
902         movdqa  xmm9,xmm5
903         psrlq   xmm5,1
904         movdqa  xmm10,xmm15
905         psrlq   xmm15,1
906         pxor    xmm5,xmm3
907         pxor    xmm15,xmm0
908         pand    xmm5,xmm7
909         pand    xmm15,xmm7
910         pxor    xmm3,xmm5
911         psllq   xmm5,1
912         pxor    xmm0,xmm15
913         psllq   xmm15,1
914         pxor    xmm5,xmm9
915         pxor    xmm15,xmm10
916         movdqa  xmm7,XMMWORD PTR[32+r11]
917         movdqa  xmm9,xmm6
918         psrlq   xmm6,2
919         movdqa  xmm10,xmm1
920         psrlq   xmm1,2
921         pxor    xmm6,xmm4
922         pxor    xmm1,xmm2
923         pand    xmm6,xmm8
924         pand    xmm1,xmm8
925         pxor    xmm4,xmm6
926         psllq   xmm6,2
927         pxor    xmm2,xmm1
928         psllq   xmm1,2
929         pxor    xmm6,xmm9
930         pxor    xmm1,xmm10
931         movdqa  xmm9,xmm0
932         psrlq   xmm0,2
933         movdqa  xmm10,xmm15
934         psrlq   xmm15,2
935         pxor    xmm0,xmm3
936         pxor    xmm15,xmm5
937         pand    xmm0,xmm8
938         pand    xmm15,xmm8
939         pxor    xmm3,xmm0
940         psllq   xmm0,2
941         pxor    xmm5,xmm15
942         psllq   xmm15,2
943         pxor    xmm0,xmm9
944         pxor    xmm15,xmm10
945         movdqa  xmm9,xmm3
946         psrlq   xmm3,4
947         movdqa  xmm10,xmm5
948         psrlq   xmm5,4
949         pxor    xmm3,xmm4
950         pxor    xmm5,xmm2
951         pand    xmm3,xmm7
952         pand    xmm5,xmm7
953         pxor    xmm4,xmm3
954         psllq   xmm3,4
955         pxor    xmm2,xmm5
956         psllq   xmm5,4
957         pxor    xmm3,xmm9
958         pxor    xmm5,xmm10
959         movdqa  xmm9,xmm0
960         psrlq   xmm0,4
961         movdqa  xmm10,xmm15
962         psrlq   xmm15,4
963         pxor    xmm0,xmm6
964         pxor    xmm15,xmm1
965         pand    xmm0,xmm7
966         pand    xmm15,xmm7
967         pxor    xmm6,xmm0
968         psllq   xmm0,4
969         pxor    xmm1,xmm15
970         psllq   xmm15,4
971         pxor    xmm0,xmm9
972         pxor    xmm15,xmm10
973         movdqa  xmm7,XMMWORD PTR[rax]
974         pxor    xmm5,xmm7
975         pxor    xmm3,xmm7
976         pxor    xmm1,xmm7
977         pxor    xmm6,xmm7
978         pxor    xmm2,xmm7
979         pxor    xmm4,xmm7
980         pxor    xmm15,xmm7
981         pxor    xmm0,xmm7
982         DB      0F3h,0C3h               ;repret
983 _bsaes_decrypt8 ENDP
984
985 ALIGN   16
986 _bsaes_key_convert      PROC PRIVATE
987         lea     r11,QWORD PTR[$L$masks]
988         movdqu  xmm7,XMMWORD PTR[rcx]
989         lea     rcx,QWORD PTR[16+rcx]
990         movdqa  xmm0,XMMWORD PTR[r11]
991         movdqa  xmm1,XMMWORD PTR[16+r11]
992         movdqa  xmm2,XMMWORD PTR[32+r11]
993         movdqa  xmm3,XMMWORD PTR[48+r11]
994         movdqa  xmm4,XMMWORD PTR[64+r11]
995         pcmpeqd xmm5,xmm5
996
997         movdqu  xmm6,XMMWORD PTR[rcx]
998         movdqa  XMMWORD PTR[rax],xmm7
999         lea     rax,QWORD PTR[16+rax]
1000         dec     r10d
1001         jmp     $L$key_loop
1002 ALIGN   16
1003 $L$key_loop::
1004 DB      102,15,56,0,244
1005
1006         movdqa  xmm8,xmm0
1007         movdqa  xmm9,xmm1
1008
1009         pand    xmm8,xmm6
1010         pand    xmm9,xmm6
1011         movdqa  xmm10,xmm2
1012         pcmpeqb xmm8,xmm0
1013         psllq   xmm0,4
1014         movdqa  xmm11,xmm3
1015         pcmpeqb xmm9,xmm1
1016         psllq   xmm1,4
1017
1018         pand    xmm10,xmm6
1019         pand    xmm11,xmm6
1020         movdqa  xmm12,xmm0
1021         pcmpeqb xmm10,xmm2
1022         psllq   xmm2,4
1023         movdqa  xmm13,xmm1
1024         pcmpeqb xmm11,xmm3
1025         psllq   xmm3,4
1026
1027         movdqa  xmm14,xmm2
1028         movdqa  xmm15,xmm3
1029         pxor    xmm8,xmm5
1030         pxor    xmm9,xmm5
1031
1032         pand    xmm12,xmm6
1033         pand    xmm13,xmm6
1034         movdqa  XMMWORD PTR[rax],xmm8
1035         pcmpeqb xmm12,xmm0
1036         psrlq   xmm0,4
1037         movdqa  XMMWORD PTR[16+rax],xmm9
1038         pcmpeqb xmm13,xmm1
1039         psrlq   xmm1,4
1040         lea     rcx,QWORD PTR[16+rcx]
1041
1042         pand    xmm14,xmm6
1043         pand    xmm15,xmm6
1044         movdqa  XMMWORD PTR[32+rax],xmm10
1045         pcmpeqb xmm14,xmm2
1046         psrlq   xmm2,4
1047         movdqa  XMMWORD PTR[48+rax],xmm11
1048         pcmpeqb xmm15,xmm3
1049         psrlq   xmm3,4
1050         movdqu  xmm6,XMMWORD PTR[rcx]
1051
1052         pxor    xmm13,xmm5
1053         pxor    xmm14,xmm5
1054         movdqa  XMMWORD PTR[64+rax],xmm12
1055         movdqa  XMMWORD PTR[80+rax],xmm13
1056         movdqa  XMMWORD PTR[96+rax],xmm14
1057         movdqa  XMMWORD PTR[112+rax],xmm15
1058         lea     rax,QWORD PTR[128+rax]
1059         dec     r10d
1060         jnz     $L$key_loop
1061
1062         movdqa  xmm7,XMMWORD PTR[80+r11]
1063
1064         DB      0F3h,0C3h               ;repret
1065 _bsaes_key_convert      ENDP
1066 EXTERN  asm_AES_cbc_encrypt:NEAR
1067 PUBLIC  bsaes_cbc_encrypt
1068
1069 ALIGN   16
1070 bsaes_cbc_encrypt       PROC PUBLIC
1071         mov     r11d,DWORD PTR[48+rsp]
1072         cmp     r11d,0
1073         jne     asm_AES_cbc_encrypt
1074         cmp     r8,128
1075         jb      asm_AES_cbc_encrypt
1076
1077         mov     rax,rsp
1078 $L$cbc_dec_prologue::
1079         push    rbp
1080         push    rbx
1081         push    r12
1082         push    r13
1083         push    r14
1084         push    r15
1085         lea     rsp,QWORD PTR[((-72))+rsp]
1086         mov     r10,QWORD PTR[160+rsp]
1087         lea     rsp,QWORD PTR[((-160))+rsp]
1088         movaps  XMMWORD PTR[64+rsp],xmm6
1089         movaps  XMMWORD PTR[80+rsp],xmm7
1090         movaps  XMMWORD PTR[96+rsp],xmm8
1091         movaps  XMMWORD PTR[112+rsp],xmm9
1092         movaps  XMMWORD PTR[128+rsp],xmm10
1093         movaps  XMMWORD PTR[144+rsp],xmm11
1094         movaps  XMMWORD PTR[160+rsp],xmm12
1095         movaps  XMMWORD PTR[176+rsp],xmm13
1096         movaps  XMMWORD PTR[192+rsp],xmm14
1097         movaps  XMMWORD PTR[208+rsp],xmm15
1098 $L$cbc_dec_body::
1099         mov     rbp,rsp
1100         mov     eax,DWORD PTR[240+r9]
1101         mov     r12,rcx
1102         mov     r13,rdx
1103         mov     r14,r8
1104         mov     r15,r9
1105         mov     rbx,r10
1106         shr     r14,4
1107
1108         mov     edx,eax
1109         shl     rax,7
1110         sub     rax,96
1111         sub     rsp,rax
1112
1113         mov     rax,rsp
1114         mov     rcx,r15
1115         mov     r10d,edx
1116         call    _bsaes_key_convert
1117         pxor    xmm7,XMMWORD PTR[rsp]
1118         movdqa  XMMWORD PTR[rax],xmm6
1119         movdqa  XMMWORD PTR[rsp],xmm7
1120
1121         movdqu  xmm14,XMMWORD PTR[rbx]
1122         sub     r14,8
1123 $L$cbc_dec_loop::
1124         movdqu  xmm15,XMMWORD PTR[r12]
1125         movdqu  xmm0,XMMWORD PTR[16+r12]
1126         movdqu  xmm1,XMMWORD PTR[32+r12]
1127         movdqu  xmm2,XMMWORD PTR[48+r12]
1128         movdqu  xmm3,XMMWORD PTR[64+r12]
1129         movdqu  xmm4,XMMWORD PTR[80+r12]
1130         mov     rax,rsp
1131         movdqu  xmm5,XMMWORD PTR[96+r12]
1132         mov     r10d,edx
1133         movdqu  xmm6,XMMWORD PTR[112+r12]
1134         movdqa  XMMWORD PTR[32+rbp],xmm14
1135
1136         call    _bsaes_decrypt8
1137
1138         pxor    xmm15,XMMWORD PTR[32+rbp]
1139         movdqu  xmm7,XMMWORD PTR[r12]
1140         movdqu  xmm8,XMMWORD PTR[16+r12]
1141         pxor    xmm0,xmm7
1142         movdqu  xmm9,XMMWORD PTR[32+r12]
1143         pxor    xmm5,xmm8
1144         movdqu  xmm10,XMMWORD PTR[48+r12]
1145         pxor    xmm3,xmm9
1146         movdqu  xmm11,XMMWORD PTR[64+r12]
1147         pxor    xmm1,xmm10
1148         movdqu  xmm12,XMMWORD PTR[80+r12]
1149         pxor    xmm6,xmm11
1150         movdqu  xmm13,XMMWORD PTR[96+r12]
1151         pxor    xmm2,xmm12
1152         movdqu  xmm14,XMMWORD PTR[112+r12]
1153         pxor    xmm4,xmm13
1154         movdqu  XMMWORD PTR[r13],xmm15
1155         lea     r12,QWORD PTR[128+r12]
1156         movdqu  XMMWORD PTR[16+r13],xmm0
1157         movdqu  XMMWORD PTR[32+r13],xmm5
1158         movdqu  XMMWORD PTR[48+r13],xmm3
1159         movdqu  XMMWORD PTR[64+r13],xmm1
1160         movdqu  XMMWORD PTR[80+r13],xmm6
1161         movdqu  XMMWORD PTR[96+r13],xmm2
1162         movdqu  XMMWORD PTR[112+r13],xmm4
1163         lea     r13,QWORD PTR[128+r13]
1164         sub     r14,8
1165         jnc     $L$cbc_dec_loop
1166
1167         add     r14,8
1168         jz      $L$cbc_dec_done
1169
1170         movdqu  xmm15,XMMWORD PTR[r12]
1171         mov     rax,rsp
1172         mov     r10d,edx
1173         cmp     r14,2
1174         jb      $L$cbc_dec_one
1175         movdqu  xmm0,XMMWORD PTR[16+r12]
1176         je      $L$cbc_dec_two
1177         movdqu  xmm1,XMMWORD PTR[32+r12]
1178         cmp     r14,4
1179         jb      $L$cbc_dec_three
1180         movdqu  xmm2,XMMWORD PTR[48+r12]
1181         je      $L$cbc_dec_four
1182         movdqu  xmm3,XMMWORD PTR[64+r12]
1183         cmp     r14,6
1184         jb      $L$cbc_dec_five
1185         movdqu  xmm4,XMMWORD PTR[80+r12]
1186         je      $L$cbc_dec_six
1187         movdqu  xmm5,XMMWORD PTR[96+r12]
1188         movdqa  XMMWORD PTR[32+rbp],xmm14
1189         call    _bsaes_decrypt8
1190         pxor    xmm15,XMMWORD PTR[32+rbp]
1191         movdqu  xmm7,XMMWORD PTR[r12]
1192         movdqu  xmm8,XMMWORD PTR[16+r12]
1193         pxor    xmm0,xmm7
1194         movdqu  xmm9,XMMWORD PTR[32+r12]
1195         pxor    xmm5,xmm8
1196         movdqu  xmm10,XMMWORD PTR[48+r12]
1197         pxor    xmm3,xmm9
1198         movdqu  xmm11,XMMWORD PTR[64+r12]
1199         pxor    xmm1,xmm10
1200         movdqu  xmm12,XMMWORD PTR[80+r12]
1201         pxor    xmm6,xmm11
1202         movdqu  xmm14,XMMWORD PTR[96+r12]
1203         pxor    xmm2,xmm12
1204         movdqu  XMMWORD PTR[r13],xmm15
1205         movdqu  XMMWORD PTR[16+r13],xmm0
1206         movdqu  XMMWORD PTR[32+r13],xmm5
1207         movdqu  XMMWORD PTR[48+r13],xmm3
1208         movdqu  XMMWORD PTR[64+r13],xmm1
1209         movdqu  XMMWORD PTR[80+r13],xmm6
1210         movdqu  XMMWORD PTR[96+r13],xmm2
1211         jmp     $L$cbc_dec_done
1212 ALIGN   16
1213 $L$cbc_dec_six::
1214         movdqa  XMMWORD PTR[32+rbp],xmm14
1215         call    _bsaes_decrypt8
1216         pxor    xmm15,XMMWORD PTR[32+rbp]
1217         movdqu  xmm7,XMMWORD PTR[r12]
1218         movdqu  xmm8,XMMWORD PTR[16+r12]
1219         pxor    xmm0,xmm7
1220         movdqu  xmm9,XMMWORD PTR[32+r12]
1221         pxor    xmm5,xmm8
1222         movdqu  xmm10,XMMWORD PTR[48+r12]
1223         pxor    xmm3,xmm9
1224         movdqu  xmm11,XMMWORD PTR[64+r12]
1225         pxor    xmm1,xmm10
1226         movdqu  xmm14,XMMWORD PTR[80+r12]
1227         pxor    xmm6,xmm11
1228         movdqu  XMMWORD PTR[r13],xmm15
1229         movdqu  XMMWORD PTR[16+r13],xmm0
1230         movdqu  XMMWORD PTR[32+r13],xmm5
1231         movdqu  XMMWORD PTR[48+r13],xmm3
1232         movdqu  XMMWORD PTR[64+r13],xmm1
1233         movdqu  XMMWORD PTR[80+r13],xmm6
1234         jmp     $L$cbc_dec_done
1235 ALIGN   16
1236 $L$cbc_dec_five::
1237         movdqa  XMMWORD PTR[32+rbp],xmm14
1238         call    _bsaes_decrypt8
1239         pxor    xmm15,XMMWORD PTR[32+rbp]
1240         movdqu  xmm7,XMMWORD PTR[r12]
1241         movdqu  xmm8,XMMWORD PTR[16+r12]
1242         pxor    xmm0,xmm7
1243         movdqu  xmm9,XMMWORD PTR[32+r12]
1244         pxor    xmm5,xmm8
1245         movdqu  xmm10,XMMWORD PTR[48+r12]
1246         pxor    xmm3,xmm9
1247         movdqu  xmm14,XMMWORD PTR[64+r12]
1248         pxor    xmm1,xmm10
1249         movdqu  XMMWORD PTR[r13],xmm15
1250         movdqu  XMMWORD PTR[16+r13],xmm0
1251         movdqu  XMMWORD PTR[32+r13],xmm5
1252         movdqu  XMMWORD PTR[48+r13],xmm3
1253         movdqu  XMMWORD PTR[64+r13],xmm1
1254         jmp     $L$cbc_dec_done
1255 ALIGN   16
1256 $L$cbc_dec_four::
1257         movdqa  XMMWORD PTR[32+rbp],xmm14
1258         call    _bsaes_decrypt8
1259         pxor    xmm15,XMMWORD PTR[32+rbp]
1260         movdqu  xmm7,XMMWORD PTR[r12]
1261         movdqu  xmm8,XMMWORD PTR[16+r12]
1262         pxor    xmm0,xmm7
1263         movdqu  xmm9,XMMWORD PTR[32+r12]
1264         pxor    xmm5,xmm8
1265         movdqu  xmm14,XMMWORD PTR[48+r12]
1266         pxor    xmm3,xmm9
1267         movdqu  XMMWORD PTR[r13],xmm15
1268         movdqu  XMMWORD PTR[16+r13],xmm0
1269         movdqu  XMMWORD PTR[32+r13],xmm5
1270         movdqu  XMMWORD PTR[48+r13],xmm3
1271         jmp     $L$cbc_dec_done
1272 ALIGN   16
1273 $L$cbc_dec_three::
1274         movdqa  XMMWORD PTR[32+rbp],xmm14
1275         call    _bsaes_decrypt8
1276         pxor    xmm15,XMMWORD PTR[32+rbp]
1277         movdqu  xmm7,XMMWORD PTR[r12]
1278         movdqu  xmm8,XMMWORD PTR[16+r12]
1279         pxor    xmm0,xmm7
1280         movdqu  xmm14,XMMWORD PTR[32+r12]
1281         pxor    xmm5,xmm8
1282         movdqu  XMMWORD PTR[r13],xmm15
1283         movdqu  XMMWORD PTR[16+r13],xmm0
1284         movdqu  XMMWORD PTR[32+r13],xmm5
1285         jmp     $L$cbc_dec_done
1286 ALIGN   16
1287 $L$cbc_dec_two::
1288         movdqa  XMMWORD PTR[32+rbp],xmm14
1289         call    _bsaes_decrypt8
1290         pxor    xmm15,XMMWORD PTR[32+rbp]
1291         movdqu  xmm7,XMMWORD PTR[r12]
1292         movdqu  xmm14,XMMWORD PTR[16+r12]
1293         pxor    xmm0,xmm7
1294         movdqu  XMMWORD PTR[r13],xmm15
1295         movdqu  XMMWORD PTR[16+r13],xmm0
1296         jmp     $L$cbc_dec_done
1297 ALIGN   16
1298 $L$cbc_dec_one::
1299         lea     rcx,QWORD PTR[r12]
1300         lea     rdx,QWORD PTR[32+rbp]
1301         lea     r8,QWORD PTR[r15]
1302         call    asm_AES_decrypt
1303         pxor    xmm14,XMMWORD PTR[32+rbp]
1304         movdqu  XMMWORD PTR[r13],xmm14
1305         movdqa  xmm14,xmm15
1306
1307 $L$cbc_dec_done::
1308         movdqu  XMMWORD PTR[rbx],xmm14
1309         lea     rax,QWORD PTR[rsp]
1310         pxor    xmm0,xmm0
1311 $L$cbc_dec_bzero::
1312         movdqa  XMMWORD PTR[rax],xmm0
1313         movdqa  XMMWORD PTR[16+rax],xmm0
1314         lea     rax,QWORD PTR[32+rax]
1315         cmp     rbp,rax
1316         ja      $L$cbc_dec_bzero
1317
1318         lea     rsp,QWORD PTR[rbp]
1319         movaps  xmm6,XMMWORD PTR[64+rbp]
1320         movaps  xmm7,XMMWORD PTR[80+rbp]
1321         movaps  xmm8,XMMWORD PTR[96+rbp]
1322         movaps  xmm9,XMMWORD PTR[112+rbp]
1323         movaps  xmm10,XMMWORD PTR[128+rbp]
1324         movaps  xmm11,XMMWORD PTR[144+rbp]
1325         movaps  xmm12,XMMWORD PTR[160+rbp]
1326         movaps  xmm13,XMMWORD PTR[176+rbp]
1327         movaps  xmm14,XMMWORD PTR[192+rbp]
1328         movaps  xmm15,XMMWORD PTR[208+rbp]
1329         lea     rsp,QWORD PTR[160+rbp]
1330         mov     r15,QWORD PTR[72+rsp]
1331         mov     r14,QWORD PTR[80+rsp]
1332         mov     r13,QWORD PTR[88+rsp]
1333         mov     r12,QWORD PTR[96+rsp]
1334         mov     rbx,QWORD PTR[104+rsp]
1335         mov     rax,QWORD PTR[112+rsp]
1336         lea     rsp,QWORD PTR[120+rsp]
1337         mov     rbp,rax
1338 $L$cbc_dec_epilogue::
1339         DB      0F3h,0C3h               ;repret
1340 bsaes_cbc_encrypt       ENDP
1341
1342 PUBLIC  bsaes_ctr32_encrypt_blocks
1343
1344 ALIGN   16
1345 bsaes_ctr32_encrypt_blocks      PROC PUBLIC
1346         mov     rax,rsp
1347 $L$ctr_enc_prologue::
1348         push    rbp
1349         push    rbx
1350         push    r12
1351         push    r13
1352         push    r14
1353         push    r15
1354         lea     rsp,QWORD PTR[((-72))+rsp]
1355         mov     r10,QWORD PTR[160+rsp]
1356         lea     rsp,QWORD PTR[((-160))+rsp]
1357         movaps  XMMWORD PTR[64+rsp],xmm6
1358         movaps  XMMWORD PTR[80+rsp],xmm7
1359         movaps  XMMWORD PTR[96+rsp],xmm8
1360         movaps  XMMWORD PTR[112+rsp],xmm9
1361         movaps  XMMWORD PTR[128+rsp],xmm10
1362         movaps  XMMWORD PTR[144+rsp],xmm11
1363         movaps  XMMWORD PTR[160+rsp],xmm12
1364         movaps  XMMWORD PTR[176+rsp],xmm13
1365         movaps  XMMWORD PTR[192+rsp],xmm14
1366         movaps  XMMWORD PTR[208+rsp],xmm15
1367 $L$ctr_enc_body::
1368         mov     rbp,rsp
1369         movdqu  xmm0,XMMWORD PTR[r10]
1370         mov     eax,DWORD PTR[240+r9]
1371         mov     r12,rcx
1372         mov     r13,rdx
1373         mov     r14,r8
1374         mov     r15,r9
1375         movdqa  XMMWORD PTR[32+rbp],xmm0
1376         cmp     r8,8
1377         jb      $L$ctr_enc_short
1378
1379         mov     ebx,eax
1380         shl     rax,7
1381         sub     rax,96
1382         sub     rsp,rax
1383
1384         mov     rax,rsp
1385         mov     rcx,r15
1386         mov     r10d,ebx
1387         call    _bsaes_key_convert
1388         pxor    xmm7,xmm6
1389         movdqa  XMMWORD PTR[rax],xmm7
1390
1391         movdqa  xmm8,XMMWORD PTR[rsp]
1392         lea     r11,QWORD PTR[$L$ADD1]
1393         movdqa  xmm15,XMMWORD PTR[32+rbp]
1394         movdqa  xmm7,XMMWORD PTR[((-32))+r11]
1395 DB      102,68,15,56,0,199
1396 DB      102,68,15,56,0,255
1397         movdqa  XMMWORD PTR[rsp],xmm8
1398         jmp     $L$ctr_enc_loop
1399 ALIGN   16
1400 $L$ctr_enc_loop::
1401         movdqa  XMMWORD PTR[32+rbp],xmm15
1402         movdqa  xmm0,xmm15
1403         movdqa  xmm1,xmm15
1404         paddd   xmm0,XMMWORD PTR[r11]
1405         movdqa  xmm2,xmm15
1406         paddd   xmm1,XMMWORD PTR[16+r11]
1407         movdqa  xmm3,xmm15
1408         paddd   xmm2,XMMWORD PTR[32+r11]
1409         movdqa  xmm4,xmm15
1410         paddd   xmm3,XMMWORD PTR[48+r11]
1411         movdqa  xmm5,xmm15
1412         paddd   xmm4,XMMWORD PTR[64+r11]
1413         movdqa  xmm6,xmm15
1414         paddd   xmm5,XMMWORD PTR[80+r11]
1415         paddd   xmm6,XMMWORD PTR[96+r11]
1416
1417
1418
1419         movdqa  xmm8,XMMWORD PTR[rsp]
1420         lea     rax,QWORD PTR[16+rsp]
1421         movdqa  xmm7,XMMWORD PTR[((-16))+r11]
1422         pxor    xmm15,xmm8
1423         pxor    xmm0,xmm8
1424         pxor    xmm1,xmm8
1425         pxor    xmm2,xmm8
1426 DB      102,68,15,56,0,255
1427 DB      102,15,56,0,199
1428         pxor    xmm3,xmm8
1429         pxor    xmm4,xmm8
1430 DB      102,15,56,0,207
1431 DB      102,15,56,0,215
1432         pxor    xmm5,xmm8
1433         pxor    xmm6,xmm8
1434 DB      102,15,56,0,223
1435 DB      102,15,56,0,231
1436 DB      102,15,56,0,239
1437 DB      102,15,56,0,247
1438         lea     r11,QWORD PTR[$L$BS0]
1439         mov     r10d,ebx
1440
1441         call    _bsaes_encrypt8_bitslice
1442
1443         sub     r14,8
1444         jc      $L$ctr_enc_loop_done
1445
1446         movdqu  xmm7,XMMWORD PTR[r12]
1447         movdqu  xmm8,XMMWORD PTR[16+r12]
1448         movdqu  xmm9,XMMWORD PTR[32+r12]
1449         movdqu  xmm10,XMMWORD PTR[48+r12]
1450         movdqu  xmm11,XMMWORD PTR[64+r12]
1451         movdqu  xmm12,XMMWORD PTR[80+r12]
1452         movdqu  xmm13,XMMWORD PTR[96+r12]
1453         movdqu  xmm14,XMMWORD PTR[112+r12]
1454         lea     r12,QWORD PTR[128+r12]
1455         pxor    xmm7,xmm15
1456         movdqa  xmm15,XMMWORD PTR[32+rbp]
1457         pxor    xmm0,xmm8
1458         movdqu  XMMWORD PTR[r13],xmm7
1459         pxor    xmm3,xmm9
1460         movdqu  XMMWORD PTR[16+r13],xmm0
1461         pxor    xmm5,xmm10
1462         movdqu  XMMWORD PTR[32+r13],xmm3
1463         pxor    xmm2,xmm11
1464         movdqu  XMMWORD PTR[48+r13],xmm5
1465         pxor    xmm6,xmm12
1466         movdqu  XMMWORD PTR[64+r13],xmm2
1467         pxor    xmm1,xmm13
1468         movdqu  XMMWORD PTR[80+r13],xmm6
1469         pxor    xmm4,xmm14
1470         movdqu  XMMWORD PTR[96+r13],xmm1
1471         lea     r11,QWORD PTR[$L$ADD1]
1472         movdqu  XMMWORD PTR[112+r13],xmm4
1473         lea     r13,QWORD PTR[128+r13]
1474         paddd   xmm15,XMMWORD PTR[112+r11]
1475         jnz     $L$ctr_enc_loop
1476
1477         jmp     $L$ctr_enc_done
1478 ALIGN   16
1479 $L$ctr_enc_loop_done::
1480         add     r14,8
1481         movdqu  xmm7,XMMWORD PTR[r12]
1482         pxor    xmm15,xmm7
1483         movdqu  XMMWORD PTR[r13],xmm15
1484         cmp     r14,2
1485         jb      $L$ctr_enc_done
1486         movdqu  xmm8,XMMWORD PTR[16+r12]
1487         pxor    xmm0,xmm8
1488         movdqu  XMMWORD PTR[16+r13],xmm0
1489         je      $L$ctr_enc_done
1490         movdqu  xmm9,XMMWORD PTR[32+r12]
1491         pxor    xmm3,xmm9
1492         movdqu  XMMWORD PTR[32+r13],xmm3
1493         cmp     r14,4
1494         jb      $L$ctr_enc_done
1495         movdqu  xmm10,XMMWORD PTR[48+r12]
1496         pxor    xmm5,xmm10
1497         movdqu  XMMWORD PTR[48+r13],xmm5
1498         je      $L$ctr_enc_done
1499         movdqu  xmm11,XMMWORD PTR[64+r12]
1500         pxor    xmm2,xmm11
1501         movdqu  XMMWORD PTR[64+r13],xmm2
1502         cmp     r14,6
1503         jb      $L$ctr_enc_done
1504         movdqu  xmm12,XMMWORD PTR[80+r12]
1505         pxor    xmm6,xmm12
1506         movdqu  XMMWORD PTR[80+r13],xmm6
1507         je      $L$ctr_enc_done
1508         movdqu  xmm13,XMMWORD PTR[96+r12]
1509         pxor    xmm1,xmm13
1510         movdqu  XMMWORD PTR[96+r13],xmm1
1511         jmp     $L$ctr_enc_done
1512
1513 ALIGN   16
1514 $L$ctr_enc_short::
1515         lea     rcx,QWORD PTR[32+rbp]
1516         lea     rdx,QWORD PTR[48+rbp]
1517         lea     r8,QWORD PTR[r15]
1518         call    asm_AES_encrypt
1519         movdqu  xmm0,XMMWORD PTR[r12]
1520         lea     r12,QWORD PTR[16+r12]
1521         mov     eax,DWORD PTR[44+rbp]
1522         bswap   eax
1523         pxor    xmm0,XMMWORD PTR[48+rbp]
1524         inc     eax
1525         movdqu  XMMWORD PTR[r13],xmm0
1526         bswap   eax
1527         lea     r13,QWORD PTR[16+r13]
1528         mov     DWORD PTR[44+rsp],eax
1529         dec     r14
1530         jnz     $L$ctr_enc_short
1531
1532 $L$ctr_enc_done::
1533         lea     rax,QWORD PTR[rsp]
1534         pxor    xmm0,xmm0
1535 $L$ctr_enc_bzero::
1536         movdqa  XMMWORD PTR[rax],xmm0
1537         movdqa  XMMWORD PTR[16+rax],xmm0
1538         lea     rax,QWORD PTR[32+rax]
1539         cmp     rbp,rax
1540         ja      $L$ctr_enc_bzero
1541
1542         lea     rsp,QWORD PTR[rbp]
1543         movaps  xmm6,XMMWORD PTR[64+rbp]
1544         movaps  xmm7,XMMWORD PTR[80+rbp]
1545         movaps  xmm8,XMMWORD PTR[96+rbp]
1546         movaps  xmm9,XMMWORD PTR[112+rbp]
1547         movaps  xmm10,XMMWORD PTR[128+rbp]
1548         movaps  xmm11,XMMWORD PTR[144+rbp]
1549         movaps  xmm12,XMMWORD PTR[160+rbp]
1550         movaps  xmm13,XMMWORD PTR[176+rbp]
1551         movaps  xmm14,XMMWORD PTR[192+rbp]
1552         movaps  xmm15,XMMWORD PTR[208+rbp]
1553         lea     rsp,QWORD PTR[160+rbp]
1554         mov     r15,QWORD PTR[72+rsp]
1555         mov     r14,QWORD PTR[80+rsp]
1556         mov     r13,QWORD PTR[88+rsp]
1557         mov     r12,QWORD PTR[96+rsp]
1558         mov     rbx,QWORD PTR[104+rsp]
1559         mov     rax,QWORD PTR[112+rsp]
1560         lea     rsp,QWORD PTR[120+rsp]
1561         mov     rbp,rax
1562 $L$ctr_enc_epilogue::
1563         DB      0F3h,0C3h               ;repret
1564 bsaes_ctr32_encrypt_blocks      ENDP
1565 PUBLIC  bsaes_xts_encrypt
1566
1567 ALIGN   16
1568 bsaes_xts_encrypt       PROC PUBLIC
1569         mov     rax,rsp
1570 $L$xts_enc_prologue::
1571         push    rbp
1572         push    rbx
1573         push    r12
1574         push    r13
1575         push    r14
1576         push    r15
1577         lea     rsp,QWORD PTR[((-72))+rsp]
1578         mov     r10,QWORD PTR[160+rsp]
1579         mov     r11,QWORD PTR[168+rsp]
1580         lea     rsp,QWORD PTR[((-160))+rsp]
1581         movaps  XMMWORD PTR[64+rsp],xmm6
1582         movaps  XMMWORD PTR[80+rsp],xmm7
1583         movaps  XMMWORD PTR[96+rsp],xmm8
1584         movaps  XMMWORD PTR[112+rsp],xmm9
1585         movaps  XMMWORD PTR[128+rsp],xmm10
1586         movaps  XMMWORD PTR[144+rsp],xmm11
1587         movaps  XMMWORD PTR[160+rsp],xmm12
1588         movaps  XMMWORD PTR[176+rsp],xmm13
1589         movaps  XMMWORD PTR[192+rsp],xmm14
1590         movaps  XMMWORD PTR[208+rsp],xmm15
1591 $L$xts_enc_body::
1592         mov     rbp,rsp
1593         mov     r12,rcx
1594         mov     r13,rdx
1595         mov     r14,r8
1596         mov     r15,r9
1597
1598         lea     rcx,QWORD PTR[r11]
1599         lea     rdx,QWORD PTR[32+rbp]
1600         lea     r8,QWORD PTR[r10]
1601         call    asm_AES_encrypt
1602
1603         mov     eax,DWORD PTR[240+r15]
1604         mov     rbx,r14
1605
1606         mov     edx,eax
1607         shl     rax,7
1608         sub     rax,96
1609         sub     rsp,rax
1610
1611         mov     rax,rsp
1612         mov     rcx,r15
1613         mov     r10d,edx
1614         call    _bsaes_key_convert
1615         pxor    xmm7,xmm6
1616         movdqa  XMMWORD PTR[rax],xmm7
1617
1618         and     r14,-16
1619         sub     rsp,080h
1620         movdqa  xmm6,XMMWORD PTR[32+rbp]
1621
1622         pxor    xmm14,xmm14
1623         movdqa  xmm12,XMMWORD PTR[$L$xts_magic]
1624         pcmpgtd xmm14,xmm6
1625
1626         sub     r14,080h
1627         jc      $L$xts_enc_short
1628         jmp     $L$xts_enc_loop
1629
1630 ALIGN   16
1631 $L$xts_enc_loop::
1632         pshufd  xmm13,xmm14,013h
1633         pxor    xmm14,xmm14
1634         movdqa  xmm15,xmm6
1635         movdqa  XMMWORD PTR[rsp],xmm6
1636         paddq   xmm6,xmm6
1637         pand    xmm13,xmm12
1638         pcmpgtd xmm14,xmm6
1639         pxor    xmm6,xmm13
1640         pshufd  xmm13,xmm14,013h
1641         pxor    xmm14,xmm14
1642         movdqa  xmm0,xmm6
1643         movdqa  XMMWORD PTR[16+rsp],xmm6
1644         paddq   xmm6,xmm6
1645         pand    xmm13,xmm12
1646         pcmpgtd xmm14,xmm6
1647         pxor    xmm6,xmm13
1648         movdqu  xmm7,XMMWORD PTR[r12]
1649         pshufd  xmm13,xmm14,013h
1650         pxor    xmm14,xmm14
1651         movdqa  xmm1,xmm6
1652         movdqa  XMMWORD PTR[32+rsp],xmm6
1653         paddq   xmm6,xmm6
1654         pand    xmm13,xmm12
1655         pcmpgtd xmm14,xmm6
1656         pxor    xmm6,xmm13
1657         movdqu  xmm8,XMMWORD PTR[16+r12]
1658         pxor    xmm15,xmm7
1659         pshufd  xmm13,xmm14,013h
1660         pxor    xmm14,xmm14
1661         movdqa  xmm2,xmm6
1662         movdqa  XMMWORD PTR[48+rsp],xmm6
1663         paddq   xmm6,xmm6
1664         pand    xmm13,xmm12
1665         pcmpgtd xmm14,xmm6
1666         pxor    xmm6,xmm13
1667         movdqu  xmm9,XMMWORD PTR[32+r12]
1668         pxor    xmm0,xmm8
1669         pshufd  xmm13,xmm14,013h
1670         pxor    xmm14,xmm14
1671         movdqa  xmm3,xmm6
1672         movdqa  XMMWORD PTR[64+rsp],xmm6
1673         paddq   xmm6,xmm6
1674         pand    xmm13,xmm12
1675         pcmpgtd xmm14,xmm6
1676         pxor    xmm6,xmm13
1677         movdqu  xmm10,XMMWORD PTR[48+r12]
1678         pxor    xmm1,xmm9
1679         pshufd  xmm13,xmm14,013h
1680         pxor    xmm14,xmm14
1681         movdqa  xmm4,xmm6
1682         movdqa  XMMWORD PTR[80+rsp],xmm6
1683         paddq   xmm6,xmm6
1684         pand    xmm13,xmm12
1685         pcmpgtd xmm14,xmm6
1686         pxor    xmm6,xmm13
1687         movdqu  xmm11,XMMWORD PTR[64+r12]
1688         pxor    xmm2,xmm10
1689         pshufd  xmm13,xmm14,013h
1690         pxor    xmm14,xmm14
1691         movdqa  xmm5,xmm6
1692         movdqa  XMMWORD PTR[96+rsp],xmm6
1693         paddq   xmm6,xmm6
1694         pand    xmm13,xmm12
1695         pcmpgtd xmm14,xmm6
1696         pxor    xmm6,xmm13
1697         movdqu  xmm12,XMMWORD PTR[80+r12]
1698         pxor    xmm3,xmm11
1699         movdqu  xmm13,XMMWORD PTR[96+r12]
1700         pxor    xmm4,xmm12
1701         movdqu  xmm14,XMMWORD PTR[112+r12]
1702         lea     r12,QWORD PTR[128+r12]
1703         movdqa  XMMWORD PTR[112+rsp],xmm6
1704         pxor    xmm5,xmm13
1705         lea     rax,QWORD PTR[128+rsp]
1706         pxor    xmm6,xmm14
1707         mov     r10d,edx
1708
1709         call    _bsaes_encrypt8
1710
1711         pxor    xmm15,XMMWORD PTR[rsp]
1712         pxor    xmm0,XMMWORD PTR[16+rsp]
1713         movdqu  XMMWORD PTR[r13],xmm15
1714         pxor    xmm3,XMMWORD PTR[32+rsp]
1715         movdqu  XMMWORD PTR[16+r13],xmm0
1716         pxor    xmm5,XMMWORD PTR[48+rsp]
1717         movdqu  XMMWORD PTR[32+r13],xmm3
1718         pxor    xmm2,XMMWORD PTR[64+rsp]
1719         movdqu  XMMWORD PTR[48+r13],xmm5
1720         pxor    xmm6,XMMWORD PTR[80+rsp]
1721         movdqu  XMMWORD PTR[64+r13],xmm2
1722         pxor    xmm1,XMMWORD PTR[96+rsp]
1723         movdqu  XMMWORD PTR[80+r13],xmm6
1724         pxor    xmm4,XMMWORD PTR[112+rsp]
1725         movdqu  XMMWORD PTR[96+r13],xmm1
1726         movdqu  XMMWORD PTR[112+r13],xmm4
1727         lea     r13,QWORD PTR[128+r13]
1728
1729         movdqa  xmm6,XMMWORD PTR[112+rsp]
1730         pxor    xmm14,xmm14
1731         movdqa  xmm12,XMMWORD PTR[$L$xts_magic]
1732         pcmpgtd xmm14,xmm6
1733         pshufd  xmm13,xmm14,013h
1734         pxor    xmm14,xmm14
1735         paddq   xmm6,xmm6
1736         pand    xmm13,xmm12
1737         pcmpgtd xmm14,xmm6
1738         pxor    xmm6,xmm13
1739
1740         sub     r14,080h
1741         jnc     $L$xts_enc_loop
1742
1743 $L$xts_enc_short::
1744         add     r14,080h
1745         jz      $L$xts_enc_done
1746         pshufd  xmm13,xmm14,013h
1747         pxor    xmm14,xmm14
1748         movdqa  xmm15,xmm6
1749         movdqa  XMMWORD PTR[rsp],xmm6
1750         paddq   xmm6,xmm6
1751         pand    xmm13,xmm12
1752         pcmpgtd xmm14,xmm6
1753         pxor    xmm6,xmm13
1754         pshufd  xmm13,xmm14,013h
1755         pxor    xmm14,xmm14
1756         movdqa  xmm0,xmm6
1757         movdqa  XMMWORD PTR[16+rsp],xmm6
1758         paddq   xmm6,xmm6
1759         pand    xmm13,xmm12
1760         pcmpgtd xmm14,xmm6
1761         pxor    xmm6,xmm13
1762         movdqu  xmm7,XMMWORD PTR[r12]
1763         cmp     r14,16
1764         je      $L$xts_enc_1
1765         pshufd  xmm13,xmm14,013h
1766         pxor    xmm14,xmm14
1767         movdqa  xmm1,xmm6
1768         movdqa  XMMWORD PTR[32+rsp],xmm6
1769         paddq   xmm6,xmm6
1770         pand    xmm13,xmm12
1771         pcmpgtd xmm14,xmm6
1772         pxor    xmm6,xmm13
1773         movdqu  xmm8,XMMWORD PTR[16+r12]
1774         cmp     r14,32
1775         je      $L$xts_enc_2
1776         pxor    xmm15,xmm7
1777         pshufd  xmm13,xmm14,013h
1778         pxor    xmm14,xmm14
1779         movdqa  xmm2,xmm6
1780         movdqa  XMMWORD PTR[48+rsp],xmm6
1781         paddq   xmm6,xmm6
1782         pand    xmm13,xmm12
1783         pcmpgtd xmm14,xmm6
1784         pxor    xmm6,xmm13
1785         movdqu  xmm9,XMMWORD PTR[32+r12]
1786         cmp     r14,48
1787         je      $L$xts_enc_3
1788         pxor    xmm0,xmm8
1789         pshufd  xmm13,xmm14,013h
1790         pxor    xmm14,xmm14
1791         movdqa  xmm3,xmm6
1792         movdqa  XMMWORD PTR[64+rsp],xmm6
1793         paddq   xmm6,xmm6
1794         pand    xmm13,xmm12
1795         pcmpgtd xmm14,xmm6
1796         pxor    xmm6,xmm13
1797         movdqu  xmm10,XMMWORD PTR[48+r12]
1798         cmp     r14,64
1799         je      $L$xts_enc_4
1800         pxor    xmm1,xmm9
1801         pshufd  xmm13,xmm14,013h
1802         pxor    xmm14,xmm14
1803         movdqa  xmm4,xmm6
1804         movdqa  XMMWORD PTR[80+rsp],xmm6
1805         paddq   xmm6,xmm6
1806         pand    xmm13,xmm12
1807         pcmpgtd xmm14,xmm6
1808         pxor    xmm6,xmm13
1809         movdqu  xmm11,XMMWORD PTR[64+r12]
1810         cmp     r14,80
1811         je      $L$xts_enc_5
1812         pxor    xmm2,xmm10
1813         pshufd  xmm13,xmm14,013h
1814         pxor    xmm14,xmm14
1815         movdqa  xmm5,xmm6
1816         movdqa  XMMWORD PTR[96+rsp],xmm6
1817         paddq   xmm6,xmm6
1818         pand    xmm13,xmm12
1819         pcmpgtd xmm14,xmm6
1820         pxor    xmm6,xmm13
1821         movdqu  xmm12,XMMWORD PTR[80+r12]
1822         cmp     r14,96
1823         je      $L$xts_enc_6
1824         pxor    xmm3,xmm11
1825         movdqu  xmm13,XMMWORD PTR[96+r12]
1826         pxor    xmm4,xmm12
1827         movdqa  XMMWORD PTR[112+rsp],xmm6
1828         lea     r12,QWORD PTR[112+r12]
1829         pxor    xmm5,xmm13
1830         lea     rax,QWORD PTR[128+rsp]
1831         mov     r10d,edx
1832
1833         call    _bsaes_encrypt8
1834
1835         pxor    xmm15,XMMWORD PTR[rsp]
1836         pxor    xmm0,XMMWORD PTR[16+rsp]
1837         movdqu  XMMWORD PTR[r13],xmm15
1838         pxor    xmm3,XMMWORD PTR[32+rsp]
1839         movdqu  XMMWORD PTR[16+r13],xmm0
1840         pxor    xmm5,XMMWORD PTR[48+rsp]
1841         movdqu  XMMWORD PTR[32+r13],xmm3
1842         pxor    xmm2,XMMWORD PTR[64+rsp]
1843         movdqu  XMMWORD PTR[48+r13],xmm5
1844         pxor    xmm6,XMMWORD PTR[80+rsp]
1845         movdqu  XMMWORD PTR[64+r13],xmm2
1846         pxor    xmm1,XMMWORD PTR[96+rsp]
1847         movdqu  XMMWORD PTR[80+r13],xmm6
1848         movdqu  XMMWORD PTR[96+r13],xmm1
1849         lea     r13,QWORD PTR[112+r13]
1850
1851         movdqa  xmm6,XMMWORD PTR[112+rsp]
1852         jmp     $L$xts_enc_done
1853 ALIGN   16
1854 $L$xts_enc_6::
1855         pxor    xmm3,xmm11
1856         lea     r12,QWORD PTR[96+r12]
1857         pxor    xmm4,xmm12
1858         lea     rax,QWORD PTR[128+rsp]
1859         mov     r10d,edx
1860
1861         call    _bsaes_encrypt8
1862
1863         pxor    xmm15,XMMWORD PTR[rsp]
1864         pxor    xmm0,XMMWORD PTR[16+rsp]
1865         movdqu  XMMWORD PTR[r13],xmm15
1866         pxor    xmm3,XMMWORD PTR[32+rsp]
1867         movdqu  XMMWORD PTR[16+r13],xmm0
1868         pxor    xmm5,XMMWORD PTR[48+rsp]
1869         movdqu  XMMWORD PTR[32+r13],xmm3
1870         pxor    xmm2,XMMWORD PTR[64+rsp]
1871         movdqu  XMMWORD PTR[48+r13],xmm5
1872         pxor    xmm6,XMMWORD PTR[80+rsp]
1873         movdqu  XMMWORD PTR[64+r13],xmm2
1874         movdqu  XMMWORD PTR[80+r13],xmm6
1875         lea     r13,QWORD PTR[96+r13]
1876
1877         movdqa  xmm6,XMMWORD PTR[96+rsp]
1878         jmp     $L$xts_enc_done
1879 ALIGN   16
1880 $L$xts_enc_5::
1881         pxor    xmm2,xmm10
1882         lea     r12,QWORD PTR[80+r12]
1883         pxor    xmm3,xmm11
1884         lea     rax,QWORD PTR[128+rsp]
1885         mov     r10d,edx
1886
1887         call    _bsaes_encrypt8
1888
1889         pxor    xmm15,XMMWORD PTR[rsp]
1890         pxor    xmm0,XMMWORD PTR[16+rsp]
1891         movdqu  XMMWORD PTR[r13],xmm15
1892         pxor    xmm3,XMMWORD PTR[32+rsp]
1893         movdqu  XMMWORD PTR[16+r13],xmm0
1894         pxor    xmm5,XMMWORD PTR[48+rsp]
1895         movdqu  XMMWORD PTR[32+r13],xmm3
1896         pxor    xmm2,XMMWORD PTR[64+rsp]
1897         movdqu  XMMWORD PTR[48+r13],xmm5
1898         movdqu  XMMWORD PTR[64+r13],xmm2
1899         lea     r13,QWORD PTR[80+r13]
1900
1901         movdqa  xmm6,XMMWORD PTR[80+rsp]
1902         jmp     $L$xts_enc_done
1903 ALIGN   16
1904 $L$xts_enc_4::
1905         pxor    xmm1,xmm9
1906         lea     r12,QWORD PTR[64+r12]
1907         pxor    xmm2,xmm10
1908         lea     rax,QWORD PTR[128+rsp]
1909         mov     r10d,edx
1910
1911         call    _bsaes_encrypt8
1912
1913         pxor    xmm15,XMMWORD PTR[rsp]
1914         pxor    xmm0,XMMWORD PTR[16+rsp]
1915         movdqu  XMMWORD PTR[r13],xmm15
1916         pxor    xmm3,XMMWORD PTR[32+rsp]
1917         movdqu  XMMWORD PTR[16+r13],xmm0
1918         pxor    xmm5,XMMWORD PTR[48+rsp]
1919         movdqu  XMMWORD PTR[32+r13],xmm3
1920         movdqu  XMMWORD PTR[48+r13],xmm5
1921         lea     r13,QWORD PTR[64+r13]
1922
1923         movdqa  xmm6,XMMWORD PTR[64+rsp]
1924         jmp     $L$xts_enc_done
1925 ALIGN   16
1926 $L$xts_enc_3::
1927         pxor    xmm0,xmm8
1928         lea     r12,QWORD PTR[48+r12]
1929         pxor    xmm1,xmm9
1930         lea     rax,QWORD PTR[128+rsp]
1931         mov     r10d,edx
1932
1933         call    _bsaes_encrypt8
1934
1935         pxor    xmm15,XMMWORD PTR[rsp]
1936         pxor    xmm0,XMMWORD PTR[16+rsp]
1937         movdqu  XMMWORD PTR[r13],xmm15
1938         pxor    xmm3,XMMWORD PTR[32+rsp]
1939         movdqu  XMMWORD PTR[16+r13],xmm0
1940         movdqu  XMMWORD PTR[32+r13],xmm3
1941         lea     r13,QWORD PTR[48+r13]
1942
1943         movdqa  xmm6,XMMWORD PTR[48+rsp]
1944         jmp     $L$xts_enc_done
1945 ALIGN   16
1946 $L$xts_enc_2::
1947         pxor    xmm15,xmm7
1948         lea     r12,QWORD PTR[32+r12]
1949         pxor    xmm0,xmm8
1950         lea     rax,QWORD PTR[128+rsp]
1951         mov     r10d,edx
1952
1953         call    _bsaes_encrypt8
1954
1955         pxor    xmm15,XMMWORD PTR[rsp]
1956         pxor    xmm0,XMMWORD PTR[16+rsp]
1957         movdqu  XMMWORD PTR[r13],xmm15
1958         movdqu  XMMWORD PTR[16+r13],xmm0
1959         lea     r13,QWORD PTR[32+r13]
1960
1961         movdqa  xmm6,XMMWORD PTR[32+rsp]
1962         jmp     $L$xts_enc_done
1963 ALIGN   16
1964 $L$xts_enc_1::
1965         pxor    xmm7,xmm15
1966         lea     r12,QWORD PTR[16+r12]
1967         movdqa  XMMWORD PTR[32+rbp],xmm7
1968         lea     rcx,QWORD PTR[32+rbp]
1969         lea     rdx,QWORD PTR[32+rbp]
1970         lea     r8,QWORD PTR[r15]
1971         call    asm_AES_encrypt
1972         pxor    xmm15,XMMWORD PTR[32+rbp]
1973
1974
1975
1976
1977
1978         movdqu  XMMWORD PTR[r13],xmm15
1979         lea     r13,QWORD PTR[16+r13]
1980
1981         movdqa  xmm6,XMMWORD PTR[16+rsp]
1982
1983 $L$xts_enc_done::
1984         and     ebx,15
1985         jz      $L$xts_enc_ret
1986         mov     rdx,r13
1987
1988 $L$xts_enc_steal::
1989         movzx   eax,BYTE PTR[r12]
1990         movzx   ecx,BYTE PTR[((-16))+rdx]
1991         lea     r12,QWORD PTR[1+r12]
1992         mov     BYTE PTR[((-16))+rdx],al
1993         mov     BYTE PTR[rdx],cl
1994         lea     rdx,QWORD PTR[1+rdx]
1995         sub     ebx,1
1996         jnz     $L$xts_enc_steal
1997
1998         movdqu  xmm15,XMMWORD PTR[((-16))+r13]
1999         lea     rcx,QWORD PTR[32+rbp]
2000         pxor    xmm15,xmm6
2001         lea     rdx,QWORD PTR[32+rbp]
2002         movdqa  XMMWORD PTR[32+rbp],xmm15
2003         lea     r8,QWORD PTR[r15]
2004         call    asm_AES_encrypt
2005         pxor    xmm6,XMMWORD PTR[32+rbp]
2006         movdqu  XMMWORD PTR[(-16)+r13],xmm6
2007
2008 $L$xts_enc_ret::
2009         lea     rax,QWORD PTR[rsp]
2010         pxor    xmm0,xmm0
2011 $L$xts_enc_bzero::
2012         movdqa  XMMWORD PTR[rax],xmm0
2013         movdqa  XMMWORD PTR[16+rax],xmm0
2014         lea     rax,QWORD PTR[32+rax]
2015         cmp     rbp,rax
2016         ja      $L$xts_enc_bzero
2017
2018         lea     rsp,QWORD PTR[rbp]
2019         movaps  xmm6,XMMWORD PTR[64+rbp]
2020         movaps  xmm7,XMMWORD PTR[80+rbp]
2021         movaps  xmm8,XMMWORD PTR[96+rbp]
2022         movaps  xmm9,XMMWORD PTR[112+rbp]
2023         movaps  xmm10,XMMWORD PTR[128+rbp]
2024         movaps  xmm11,XMMWORD PTR[144+rbp]
2025         movaps  xmm12,XMMWORD PTR[160+rbp]
2026         movaps  xmm13,XMMWORD PTR[176+rbp]
2027         movaps  xmm14,XMMWORD PTR[192+rbp]
2028         movaps  xmm15,XMMWORD PTR[208+rbp]
2029         lea     rsp,QWORD PTR[160+rbp]
2030         mov     r15,QWORD PTR[72+rsp]
2031         mov     r14,QWORD PTR[80+rsp]
2032         mov     r13,QWORD PTR[88+rsp]
2033         mov     r12,QWORD PTR[96+rsp]
2034         mov     rbx,QWORD PTR[104+rsp]
2035         mov     rax,QWORD PTR[112+rsp]
2036         lea     rsp,QWORD PTR[120+rsp]
2037         mov     rbp,rax
2038 $L$xts_enc_epilogue::
2039         DB      0F3h,0C3h               ;repret
2040 bsaes_xts_encrypt       ENDP
2041
2042 PUBLIC  bsaes_xts_decrypt
2043
2044 ALIGN   16
2045 bsaes_xts_decrypt       PROC PUBLIC
2046         mov     rax,rsp
2047 $L$xts_dec_prologue::
2048         push    rbp
2049         push    rbx
2050         push    r12
2051         push    r13
2052         push    r14
2053         push    r15
2054         lea     rsp,QWORD PTR[((-72))+rsp]
2055         mov     r10,QWORD PTR[160+rsp]
2056         mov     r11,QWORD PTR[168+rsp]
2057         lea     rsp,QWORD PTR[((-160))+rsp]
2058         movaps  XMMWORD PTR[64+rsp],xmm6
2059         movaps  XMMWORD PTR[80+rsp],xmm7
2060         movaps  XMMWORD PTR[96+rsp],xmm8
2061         movaps  XMMWORD PTR[112+rsp],xmm9
2062         movaps  XMMWORD PTR[128+rsp],xmm10
2063         movaps  XMMWORD PTR[144+rsp],xmm11
2064         movaps  XMMWORD PTR[160+rsp],xmm12
2065         movaps  XMMWORD PTR[176+rsp],xmm13
2066         movaps  XMMWORD PTR[192+rsp],xmm14
2067         movaps  XMMWORD PTR[208+rsp],xmm15
2068 $L$xts_dec_body::
2069         mov     rbp,rsp
2070         mov     r12,rcx
2071         mov     r13,rdx
2072         mov     r14,r8
2073         mov     r15,r9
2074
2075         lea     rcx,QWORD PTR[r11]
2076         lea     rdx,QWORD PTR[32+rbp]
2077         lea     r8,QWORD PTR[r10]
2078         call    asm_AES_encrypt
2079
2080         mov     eax,DWORD PTR[240+r15]
2081         mov     rbx,r14
2082
2083         mov     edx,eax
2084         shl     rax,7
2085         sub     rax,96
2086         sub     rsp,rax
2087
2088         mov     rax,rsp
2089         mov     rcx,r15
2090         mov     r10d,edx
2091         call    _bsaes_key_convert
2092         pxor    xmm7,XMMWORD PTR[rsp]
2093         movdqa  XMMWORD PTR[rax],xmm6
2094         movdqa  XMMWORD PTR[rsp],xmm7
2095
2096         xor     eax,eax
2097         and     r14,-16
2098         test    ebx,15
2099         setnz   al
2100         shl     rax,4
2101         sub     r14,rax
2102
2103         sub     rsp,080h
2104         movdqa  xmm6,XMMWORD PTR[32+rbp]
2105
2106         pxor    xmm14,xmm14
2107         movdqa  xmm12,XMMWORD PTR[$L$xts_magic]
2108         pcmpgtd xmm14,xmm6
2109
2110         sub     r14,080h
2111         jc      $L$xts_dec_short
2112         jmp     $L$xts_dec_loop
2113
2114 ALIGN   16
2115 $L$xts_dec_loop::
2116         pshufd  xmm13,xmm14,013h
2117         pxor    xmm14,xmm14
2118         movdqa  xmm15,xmm6
2119         movdqa  XMMWORD PTR[rsp],xmm6
2120         paddq   xmm6,xmm6
2121         pand    xmm13,xmm12
2122         pcmpgtd xmm14,xmm6
2123         pxor    xmm6,xmm13
2124         pshufd  xmm13,xmm14,013h
2125         pxor    xmm14,xmm14
2126         movdqa  xmm0,xmm6
2127         movdqa  XMMWORD PTR[16+rsp],xmm6
2128         paddq   xmm6,xmm6
2129         pand    xmm13,xmm12
2130         pcmpgtd xmm14,xmm6
2131         pxor    xmm6,xmm13
2132         movdqu  xmm7,XMMWORD PTR[r12]
2133         pshufd  xmm13,xmm14,013h
2134         pxor    xmm14,xmm14
2135         movdqa  xmm1,xmm6
2136         movdqa  XMMWORD PTR[32+rsp],xmm6
2137         paddq   xmm6,xmm6
2138         pand    xmm13,xmm12
2139         pcmpgtd xmm14,xmm6
2140         pxor    xmm6,xmm13
2141         movdqu  xmm8,XMMWORD PTR[16+r12]
2142         pxor    xmm15,xmm7
2143         pshufd  xmm13,xmm14,013h
2144         pxor    xmm14,xmm14
2145         movdqa  xmm2,xmm6
2146         movdqa  XMMWORD PTR[48+rsp],xmm6
2147         paddq   xmm6,xmm6
2148         pand    xmm13,xmm12
2149         pcmpgtd xmm14,xmm6
2150         pxor    xmm6,xmm13
2151         movdqu  xmm9,XMMWORD PTR[32+r12]
2152         pxor    xmm0,xmm8
2153         pshufd  xmm13,xmm14,013h
2154         pxor    xmm14,xmm14
2155         movdqa  xmm3,xmm6
2156         movdqa  XMMWORD PTR[64+rsp],xmm6
2157         paddq   xmm6,xmm6
2158         pand    xmm13,xmm12
2159         pcmpgtd xmm14,xmm6
2160         pxor    xmm6,xmm13
2161         movdqu  xmm10,XMMWORD PTR[48+r12]
2162         pxor    xmm1,xmm9
2163         pshufd  xmm13,xmm14,013h
2164         pxor    xmm14,xmm14
2165         movdqa  xmm4,xmm6
2166         movdqa  XMMWORD PTR[80+rsp],xmm6
2167         paddq   xmm6,xmm6
2168         pand    xmm13,xmm12
2169         pcmpgtd xmm14,xmm6
2170         pxor    xmm6,xmm13
2171         movdqu  xmm11,XMMWORD PTR[64+r12]
2172         pxor    xmm2,xmm10
2173         pshufd  xmm13,xmm14,013h
2174         pxor    xmm14,xmm14
2175         movdqa  xmm5,xmm6
2176         movdqa  XMMWORD PTR[96+rsp],xmm6
2177         paddq   xmm6,xmm6
2178         pand    xmm13,xmm12
2179         pcmpgtd xmm14,xmm6
2180         pxor    xmm6,xmm13
2181         movdqu  xmm12,XMMWORD PTR[80+r12]
2182         pxor    xmm3,xmm11
2183         movdqu  xmm13,XMMWORD PTR[96+r12]
2184         pxor    xmm4,xmm12
2185         movdqu  xmm14,XMMWORD PTR[112+r12]
2186         lea     r12,QWORD PTR[128+r12]
2187         movdqa  XMMWORD PTR[112+rsp],xmm6
2188         pxor    xmm5,xmm13
2189         lea     rax,QWORD PTR[128+rsp]
2190         pxor    xmm6,xmm14
2191         mov     r10d,edx
2192
2193         call    _bsaes_decrypt8
2194
2195         pxor    xmm15,XMMWORD PTR[rsp]
2196         pxor    xmm0,XMMWORD PTR[16+rsp]
2197         movdqu  XMMWORD PTR[r13],xmm15
2198         pxor    xmm5,XMMWORD PTR[32+rsp]
2199         movdqu  XMMWORD PTR[16+r13],xmm0
2200         pxor    xmm3,XMMWORD PTR[48+rsp]
2201         movdqu  XMMWORD PTR[32+r13],xmm5
2202         pxor    xmm1,XMMWORD PTR[64+rsp]
2203         movdqu  XMMWORD PTR[48+r13],xmm3
2204         pxor    xmm6,XMMWORD PTR[80+rsp]
2205         movdqu  XMMWORD PTR[64+r13],xmm1
2206         pxor    xmm2,XMMWORD PTR[96+rsp]
2207         movdqu  XMMWORD PTR[80+r13],xmm6
2208         pxor    xmm4,XMMWORD PTR[112+rsp]
2209         movdqu  XMMWORD PTR[96+r13],xmm2
2210         movdqu  XMMWORD PTR[112+r13],xmm4
2211         lea     r13,QWORD PTR[128+r13]
2212
2213         movdqa  xmm6,XMMWORD PTR[112+rsp]
2214         pxor    xmm14,xmm14
2215         movdqa  xmm12,XMMWORD PTR[$L$xts_magic]
2216         pcmpgtd xmm14,xmm6
2217         pshufd  xmm13,xmm14,013h
2218         pxor    xmm14,xmm14
2219         paddq   xmm6,xmm6
2220         pand    xmm13,xmm12
2221         pcmpgtd xmm14,xmm6
2222         pxor    xmm6,xmm13
2223
2224         sub     r14,080h
2225         jnc     $L$xts_dec_loop
2226
2227 $L$xts_dec_short::
2228         add     r14,080h
2229         jz      $L$xts_dec_done
2230         pshufd  xmm13,xmm14,013h
2231         pxor    xmm14,xmm14
2232         movdqa  xmm15,xmm6
2233         movdqa  XMMWORD PTR[rsp],xmm6
2234         paddq   xmm6,xmm6
2235         pand    xmm13,xmm12
2236         pcmpgtd xmm14,xmm6
2237         pxor    xmm6,xmm13
2238         pshufd  xmm13,xmm14,013h
2239         pxor    xmm14,xmm14
2240         movdqa  xmm0,xmm6
2241         movdqa  XMMWORD PTR[16+rsp],xmm6
2242         paddq   xmm6,xmm6
2243         pand    xmm13,xmm12
2244         pcmpgtd xmm14,xmm6
2245         pxor    xmm6,xmm13
2246         movdqu  xmm7,XMMWORD PTR[r12]
2247         cmp     r14,16
2248         je      $L$xts_dec_1
2249         pshufd  xmm13,xmm14,013h
2250         pxor    xmm14,xmm14
2251         movdqa  xmm1,xmm6
2252         movdqa  XMMWORD PTR[32+rsp],xmm6
2253         paddq   xmm6,xmm6
2254         pand    xmm13,xmm12
2255         pcmpgtd xmm14,xmm6
2256         pxor    xmm6,xmm13
2257         movdqu  xmm8,XMMWORD PTR[16+r12]
2258         cmp     r14,32
2259         je      $L$xts_dec_2
2260         pxor    xmm15,xmm7
2261         pshufd  xmm13,xmm14,013h
2262         pxor    xmm14,xmm14
2263         movdqa  xmm2,xmm6
2264         movdqa  XMMWORD PTR[48+rsp],xmm6
2265         paddq   xmm6,xmm6
2266         pand    xmm13,xmm12
2267         pcmpgtd xmm14,xmm6
2268         pxor    xmm6,xmm13
2269         movdqu  xmm9,XMMWORD PTR[32+r12]
2270         cmp     r14,48
2271         je      $L$xts_dec_3
2272         pxor    xmm0,xmm8
2273         pshufd  xmm13,xmm14,013h
2274         pxor    xmm14,xmm14
2275         movdqa  xmm3,xmm6
2276         movdqa  XMMWORD PTR[64+rsp],xmm6
2277         paddq   xmm6,xmm6
2278         pand    xmm13,xmm12
2279         pcmpgtd xmm14,xmm6
2280         pxor    xmm6,xmm13
2281         movdqu  xmm10,XMMWORD PTR[48+r12]
2282         cmp     r14,64
2283         je      $L$xts_dec_4
2284         pxor    xmm1,xmm9
2285         pshufd  xmm13,xmm14,013h
2286         pxor    xmm14,xmm14
2287         movdqa  xmm4,xmm6
2288         movdqa  XMMWORD PTR[80+rsp],xmm6
2289         paddq   xmm6,xmm6
2290         pand    xmm13,xmm12
2291         pcmpgtd xmm14,xmm6
2292         pxor    xmm6,xmm13
2293         movdqu  xmm11,XMMWORD PTR[64+r12]
2294         cmp     r14,80
2295         je      $L$xts_dec_5
2296         pxor    xmm2,xmm10
2297         pshufd  xmm13,xmm14,013h
2298         pxor    xmm14,xmm14
2299         movdqa  xmm5,xmm6
2300         movdqa  XMMWORD PTR[96+rsp],xmm6
2301         paddq   xmm6,xmm6
2302         pand    xmm13,xmm12
2303         pcmpgtd xmm14,xmm6
2304         pxor    xmm6,xmm13
2305         movdqu  xmm12,XMMWORD PTR[80+r12]
2306         cmp     r14,96
2307         je      $L$xts_dec_6
2308         pxor    xmm3,xmm11
2309         movdqu  xmm13,XMMWORD PTR[96+r12]
2310         pxor    xmm4,xmm12
2311         movdqa  XMMWORD PTR[112+rsp],xmm6
2312         lea     r12,QWORD PTR[112+r12]
2313         pxor    xmm5,xmm13
2314         lea     rax,QWORD PTR[128+rsp]
2315         mov     r10d,edx
2316
2317         call    _bsaes_decrypt8
2318
2319         pxor    xmm15,XMMWORD PTR[rsp]
2320         pxor    xmm0,XMMWORD PTR[16+rsp]
2321         movdqu  XMMWORD PTR[r13],xmm15
2322         pxor    xmm5,XMMWORD PTR[32+rsp]
2323         movdqu  XMMWORD PTR[16+r13],xmm0
2324         pxor    xmm3,XMMWORD PTR[48+rsp]
2325         movdqu  XMMWORD PTR[32+r13],xmm5
2326         pxor    xmm1,XMMWORD PTR[64+rsp]
2327         movdqu  XMMWORD PTR[48+r13],xmm3
2328         pxor    xmm6,XMMWORD PTR[80+rsp]
2329         movdqu  XMMWORD PTR[64+r13],xmm1
2330         pxor    xmm2,XMMWORD PTR[96+rsp]
2331         movdqu  XMMWORD PTR[80+r13],xmm6
2332         movdqu  XMMWORD PTR[96+r13],xmm2
2333         lea     r13,QWORD PTR[112+r13]
2334
2335         movdqa  xmm6,XMMWORD PTR[112+rsp]
2336         jmp     $L$xts_dec_done
2337 ALIGN   16
2338 $L$xts_dec_6::
2339         pxor    xmm3,xmm11
2340         lea     r12,QWORD PTR[96+r12]
2341         pxor    xmm4,xmm12
2342         lea     rax,QWORD PTR[128+rsp]
2343         mov     r10d,edx
2344
2345         call    _bsaes_decrypt8
2346
2347         pxor    xmm15,XMMWORD PTR[rsp]
2348         pxor    xmm0,XMMWORD PTR[16+rsp]
2349         movdqu  XMMWORD PTR[r13],xmm15
2350         pxor    xmm5,XMMWORD PTR[32+rsp]
2351         movdqu  XMMWORD PTR[16+r13],xmm0
2352         pxor    xmm3,XMMWORD PTR[48+rsp]
2353         movdqu  XMMWORD PTR[32+r13],xmm5
2354         pxor    xmm1,XMMWORD PTR[64+rsp]
2355         movdqu  XMMWORD PTR[48+r13],xmm3
2356         pxor    xmm6,XMMWORD PTR[80+rsp]
2357         movdqu  XMMWORD PTR[64+r13],xmm1
2358         movdqu  XMMWORD PTR[80+r13],xmm6
2359         lea     r13,QWORD PTR[96+r13]
2360
2361         movdqa  xmm6,XMMWORD PTR[96+rsp]
2362         jmp     $L$xts_dec_done
2363 ALIGN   16
2364 $L$xts_dec_5::
2365         pxor    xmm2,xmm10
2366         lea     r12,QWORD PTR[80+r12]
2367         pxor    xmm3,xmm11
2368         lea     rax,QWORD PTR[128+rsp]
2369         mov     r10d,edx
2370
2371         call    _bsaes_decrypt8
2372
2373         pxor    xmm15,XMMWORD PTR[rsp]
2374         pxor    xmm0,XMMWORD PTR[16+rsp]
2375         movdqu  XMMWORD PTR[r13],xmm15
2376         pxor    xmm5,XMMWORD PTR[32+rsp]
2377         movdqu  XMMWORD PTR[16+r13],xmm0
2378         pxor    xmm3,XMMWORD PTR[48+rsp]
2379         movdqu  XMMWORD PTR[32+r13],xmm5
2380         pxor    xmm1,XMMWORD PTR[64+rsp]
2381         movdqu  XMMWORD PTR[48+r13],xmm3
2382         movdqu  XMMWORD PTR[64+r13],xmm1
2383         lea     r13,QWORD PTR[80+r13]
2384
2385         movdqa  xmm6,XMMWORD PTR[80+rsp]
2386         jmp     $L$xts_dec_done
2387 ALIGN   16
2388 $L$xts_dec_4::
2389         pxor    xmm1,xmm9
2390         lea     r12,QWORD PTR[64+r12]
2391         pxor    xmm2,xmm10
2392         lea     rax,QWORD PTR[128+rsp]
2393         mov     r10d,edx
2394
2395         call    _bsaes_decrypt8
2396
2397         pxor    xmm15,XMMWORD PTR[rsp]
2398         pxor    xmm0,XMMWORD PTR[16+rsp]
2399         movdqu  XMMWORD PTR[r13],xmm15
2400         pxor    xmm5,XMMWORD PTR[32+rsp]
2401         movdqu  XMMWORD PTR[16+r13],xmm0
2402         pxor    xmm3,XMMWORD PTR[48+rsp]
2403         movdqu  XMMWORD PTR[32+r13],xmm5
2404         movdqu  XMMWORD PTR[48+r13],xmm3
2405         lea     r13,QWORD PTR[64+r13]
2406
2407         movdqa  xmm6,XMMWORD PTR[64+rsp]
2408         jmp     $L$xts_dec_done
2409 ALIGN   16
2410 $L$xts_dec_3::
2411         pxor    xmm0,xmm8
2412         lea     r12,QWORD PTR[48+r12]
2413         pxor    xmm1,xmm9
2414         lea     rax,QWORD PTR[128+rsp]
2415         mov     r10d,edx
2416
2417         call    _bsaes_decrypt8
2418
2419         pxor    xmm15,XMMWORD PTR[rsp]
2420         pxor    xmm0,XMMWORD PTR[16+rsp]
2421         movdqu  XMMWORD PTR[r13],xmm15
2422         pxor    xmm5,XMMWORD PTR[32+rsp]
2423         movdqu  XMMWORD PTR[16+r13],xmm0
2424         movdqu  XMMWORD PTR[32+r13],xmm5
2425         lea     r13,QWORD PTR[48+r13]
2426
2427         movdqa  xmm6,XMMWORD PTR[48+rsp]
2428         jmp     $L$xts_dec_done
2429 ALIGN   16
2430 $L$xts_dec_2::
2431         pxor    xmm15,xmm7
2432         lea     r12,QWORD PTR[32+r12]
2433         pxor    xmm0,xmm8
2434         lea     rax,QWORD PTR[128+rsp]
2435         mov     r10d,edx
2436
2437         call    _bsaes_decrypt8
2438
2439         pxor    xmm15,XMMWORD PTR[rsp]
2440         pxor    xmm0,XMMWORD PTR[16+rsp]
2441         movdqu  XMMWORD PTR[r13],xmm15
2442         movdqu  XMMWORD PTR[16+r13],xmm0
2443         lea     r13,QWORD PTR[32+r13]
2444
2445         movdqa  xmm6,XMMWORD PTR[32+rsp]
2446         jmp     $L$xts_dec_done
2447 ALIGN   16
2448 $L$xts_dec_1::
2449         pxor    xmm7,xmm15
2450         lea     r12,QWORD PTR[16+r12]
2451         movdqa  XMMWORD PTR[32+rbp],xmm7
2452         lea     rcx,QWORD PTR[32+rbp]
2453         lea     rdx,QWORD PTR[32+rbp]
2454         lea     r8,QWORD PTR[r15]
2455         call    asm_AES_decrypt
2456         pxor    xmm15,XMMWORD PTR[32+rbp]
2457
2458
2459
2460
2461
2462         movdqu  XMMWORD PTR[r13],xmm15
2463         lea     r13,QWORD PTR[16+r13]
2464
2465         movdqa  xmm6,XMMWORD PTR[16+rsp]
2466
2467 $L$xts_dec_done::
2468         and     ebx,15
2469         jz      $L$xts_dec_ret
2470
2471         pxor    xmm14,xmm14
2472         movdqa  xmm12,XMMWORD PTR[$L$xts_magic]
2473         pcmpgtd xmm14,xmm6
2474         pshufd  xmm13,xmm14,013h
2475         movdqa  xmm5,xmm6
2476         paddq   xmm6,xmm6
2477         pand    xmm13,xmm12
2478         movdqu  xmm15,XMMWORD PTR[r12]
2479         pxor    xmm6,xmm13
2480
2481         lea     rcx,QWORD PTR[32+rbp]
2482         pxor    xmm15,xmm6
2483         lea     rdx,QWORD PTR[32+rbp]
2484         movdqa  XMMWORD PTR[32+rbp],xmm15
2485         lea     r8,QWORD PTR[r15]
2486         call    asm_AES_decrypt
2487         pxor    xmm6,XMMWORD PTR[32+rbp]
2488         mov     rdx,r13
2489         movdqu  XMMWORD PTR[r13],xmm6
2490
2491 $L$xts_dec_steal::
2492         movzx   eax,BYTE PTR[16+r12]
2493         movzx   ecx,BYTE PTR[rdx]
2494         lea     r12,QWORD PTR[1+r12]
2495         mov     BYTE PTR[rdx],al
2496         mov     BYTE PTR[16+rdx],cl
2497         lea     rdx,QWORD PTR[1+rdx]
2498         sub     ebx,1
2499         jnz     $L$xts_dec_steal
2500
2501         movdqu  xmm15,XMMWORD PTR[r13]
2502         lea     rcx,QWORD PTR[32+rbp]
2503         pxor    xmm15,xmm5
2504         lea     rdx,QWORD PTR[32+rbp]
2505         movdqa  XMMWORD PTR[32+rbp],xmm15
2506         lea     r8,QWORD PTR[r15]
2507         call    asm_AES_decrypt
2508         pxor    xmm5,XMMWORD PTR[32+rbp]
2509         movdqu  XMMWORD PTR[r13],xmm5
2510
2511 $L$xts_dec_ret::
2512         lea     rax,QWORD PTR[rsp]
2513         pxor    xmm0,xmm0
2514 $L$xts_dec_bzero::
2515         movdqa  XMMWORD PTR[rax],xmm0
2516         movdqa  XMMWORD PTR[16+rax],xmm0
2517         lea     rax,QWORD PTR[32+rax]
2518         cmp     rbp,rax
2519         ja      $L$xts_dec_bzero
2520
2521         lea     rsp,QWORD PTR[rbp]
2522         movaps  xmm6,XMMWORD PTR[64+rbp]
2523         movaps  xmm7,XMMWORD PTR[80+rbp]
2524         movaps  xmm8,XMMWORD PTR[96+rbp]
2525         movaps  xmm9,XMMWORD PTR[112+rbp]
2526         movaps  xmm10,XMMWORD PTR[128+rbp]
2527         movaps  xmm11,XMMWORD PTR[144+rbp]
2528         movaps  xmm12,XMMWORD PTR[160+rbp]
2529         movaps  xmm13,XMMWORD PTR[176+rbp]
2530         movaps  xmm14,XMMWORD PTR[192+rbp]
2531         movaps  xmm15,XMMWORD PTR[208+rbp]
2532         lea     rsp,QWORD PTR[160+rbp]
2533         mov     r15,QWORD PTR[72+rsp]
2534         mov     r14,QWORD PTR[80+rsp]
2535         mov     r13,QWORD PTR[88+rsp]
2536         mov     r12,QWORD PTR[96+rsp]
2537         mov     rbx,QWORD PTR[104+rsp]
2538         mov     rax,QWORD PTR[112+rsp]
2539         lea     rsp,QWORD PTR[120+rsp]
2540         mov     rbp,rax
2541 $L$xts_dec_epilogue::
2542         DB      0F3h,0C3h               ;repret
2543 bsaes_xts_decrypt       ENDP
2544
2545 ALIGN   64
2546 _bsaes_const::
2547 $L$M0ISR::
2548         DQ      00a0e0206070b0f03h,00004080c0d010509h
2549 $L$ISRM0::
2550         DQ      001040b0e0205080fh,00306090c00070a0dh
2551 $L$ISR::
2552         DQ      00504070602010003h,00f0e0d0c080b0a09h
2553 $L$BS0::
2554         DQ      05555555555555555h,05555555555555555h
2555 $L$BS1::
2556         DQ      03333333333333333h,03333333333333333h
2557 $L$BS2::
2558         DQ      00f0f0f0f0f0f0f0fh,00f0f0f0f0f0f0f0fh
2559 $L$SR::
2560         DQ      00504070600030201h,00f0e0d0c0a09080bh
2561 $L$SRM0::
2562         DQ      00304090e00050a0fh,001060b0c0207080dh
2563 $L$M0SR::
2564         DQ      00a0e02060f03070bh,00004080c05090d01h
2565 $L$SWPUP::
2566         DQ      00706050403020100h,00c0d0e0f0b0a0908h
2567 $L$SWPUPM0SR::
2568         DQ      00a0d02060c03070bh,00004080f05090e01h
2569 $L$ADD1::
2570         DQ      00000000000000000h,00000000100000000h
2571 $L$ADD2::
2572         DQ      00000000000000000h,00000000200000000h
2573 $L$ADD3::
2574         DQ      00000000000000000h,00000000300000000h
2575 $L$ADD4::
2576         DQ      00000000000000000h,00000000400000000h
2577 $L$ADD5::
2578         DQ      00000000000000000h,00000000500000000h
2579 $L$ADD6::
2580         DQ      00000000000000000h,00000000600000000h
2581 $L$ADD7::
2582         DQ      00000000000000000h,00000000700000000h
2583 $L$ADD8::
2584         DQ      00000000000000000h,00000000800000000h
2585 $L$xts_magic::
2586         DD      087h,0,1,0
2587 $L$masks::
2588         DQ      00101010101010101h,00101010101010101h
2589         DQ      00202020202020202h,00202020202020202h
2590         DQ      00404040404040404h,00404040404040404h
2591         DQ      00808080808080808h,00808080808080808h
2592 $L$M0::
2593         DQ      002060a0e03070b0fh,00004080c0105090dh
2594 $L$63::
2595         DQ      06363636363636363h,06363636363636363h
2596 DB      66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
2597 DB      111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
2598 DB      32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
2599 DB      32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
2600 DB      65,110,100,121,32,80,111,108,121,97,107,111,118,0
2601 ALIGN   64
2602
2603 EXTERN  __imp_RtlVirtualUnwind:NEAR
2604
2605 ALIGN   16
2606 se_handler      PROC PRIVATE
2607         push    rsi
2608         push    rdi
2609         push    rbx
2610         push    rbp
2611         push    r12
2612         push    r13
2613         push    r14
2614         push    r15
2615         pushfq
2616         sub     rsp,64
2617
2618         mov     rax,QWORD PTR[120+r8]
2619         mov     rbx,QWORD PTR[248+r8]
2620
2621         mov     rsi,QWORD PTR[8+r9]
2622         mov     r11,QWORD PTR[56+r9]
2623
2624         mov     r10d,DWORD PTR[r11]
2625         lea     r10,QWORD PTR[r10*1+rsi]
2626         cmp     rbx,r10
2627         jb      $L$in_prologue
2628
2629         mov     rax,QWORD PTR[152+r8]
2630
2631         mov     r10d,DWORD PTR[4+r11]
2632         lea     r10,QWORD PTR[r10*1+rsi]
2633         cmp     rbx,r10
2634         jae     $L$in_prologue
2635
2636         mov     rax,QWORD PTR[160+r8]
2637
2638         lea     rsi,QWORD PTR[64+rax]
2639         lea     rdi,QWORD PTR[512+r8]
2640         mov     ecx,20
2641         DD      0a548f3fch
2642         lea     rax,QWORD PTR[160+rax]
2643
2644         mov     rbp,QWORD PTR[112+rax]
2645         mov     rbx,QWORD PTR[104+rax]
2646         mov     r12,QWORD PTR[96+rax]
2647         mov     r13,QWORD PTR[88+rax]
2648         mov     r14,QWORD PTR[80+rax]
2649         mov     r15,QWORD PTR[72+rax]
2650         lea     rax,QWORD PTR[120+rax]
2651         mov     QWORD PTR[144+r8],rbx
2652         mov     QWORD PTR[160+r8],rbp
2653         mov     QWORD PTR[216+r8],r12
2654         mov     QWORD PTR[224+r8],r13
2655         mov     QWORD PTR[232+r8],r14
2656         mov     QWORD PTR[240+r8],r15
2657
2658 $L$in_prologue::
2659         mov     QWORD PTR[152+r8],rax
2660
2661         mov     rdi,QWORD PTR[40+r9]
2662         mov     rsi,r8
2663         mov     ecx,154
2664         DD      0a548f3fch
2665
2666         mov     rsi,r9
2667         xor     rcx,rcx
2668         mov     rdx,QWORD PTR[8+rsi]
2669         mov     r8,QWORD PTR[rsi]
2670         mov     r9,QWORD PTR[16+rsi]
2671         mov     r10,QWORD PTR[40+rsi]
2672         lea     r11,QWORD PTR[56+rsi]
2673         lea     r12,QWORD PTR[24+rsi]
2674         mov     QWORD PTR[32+rsp],r10
2675         mov     QWORD PTR[40+rsp],r11
2676         mov     QWORD PTR[48+rsp],r12
2677         mov     QWORD PTR[56+rsp],rcx
2678         call    QWORD PTR[__imp_RtlVirtualUnwind]
2679
2680         mov     eax,1
2681         add     rsp,64
2682         popfq
2683         pop     r15
2684         pop     r14
2685         pop     r13
2686         pop     r12
2687         pop     rbp
2688         pop     rbx
2689         pop     rdi
2690         pop     rsi
2691         DB      0F3h,0C3h               ;repret
2692 se_handler      ENDP
2693
2694 .text$  ENDS
2695 .pdata  SEGMENT READONLY ALIGN(4)
2696 ALIGN   4
2697         DD      imagerel $L$cbc_dec_prologue
2698         DD      imagerel $L$cbc_dec_epilogue
2699         DD      imagerel $L$cbc_dec_info
2700
2701         DD      imagerel $L$ctr_enc_prologue
2702         DD      imagerel $L$ctr_enc_epilogue
2703         DD      imagerel $L$ctr_enc_info
2704
2705         DD      imagerel $L$xts_enc_prologue
2706         DD      imagerel $L$xts_enc_epilogue
2707         DD      imagerel $L$xts_enc_info
2708
2709         DD      imagerel $L$xts_dec_prologue
2710         DD      imagerel $L$xts_dec_epilogue
2711         DD      imagerel $L$xts_dec_info
2712
2713 .pdata  ENDS
2714 .xdata  SEGMENT READONLY ALIGN(8)
2715 ALIGN   8
2716 $L$cbc_dec_info::
2717 DB      9,0,0,0
2718         DD      imagerel se_handler
2719         DD      imagerel $L$cbc_dec_body,imagerel $L$cbc_dec_epilogue
2720 $L$ctr_enc_info::
2721 DB      9,0,0,0
2722         DD      imagerel se_handler
2723         DD      imagerel $L$ctr_enc_body,imagerel $L$ctr_enc_epilogue
2724 $L$xts_enc_info::
2725 DB      9,0,0,0
2726         DD      imagerel se_handler
2727         DD      imagerel $L$xts_enc_body,imagerel $L$xts_enc_epilogue
2728 $L$xts_dec_info::
2729 DB      9,0,0,0
2730         DD      imagerel se_handler
2731         DD      imagerel $L$xts_dec_body,imagerel $L$xts_dec_epilogue
2732
2733 .xdata  ENDS
2734 END