Update To 11.40.268.0
[platform/framework/web/crosswalk.git] / src / third_party / boringssl / win-x86_64 / crypto / aes / bsaes-x86_64.asm
1 default rel
2 %define XMMWORD
3 %define YMMWORD
4 %define ZMMWORD
5 section .text code align=64
6
7
8 EXTERN  asm_AES_encrypt
9 EXTERN  asm_AES_decrypt
10
11
12 ALIGN   64
13 _bsaes_encrypt8:
14         lea     r11,[$L$BS0]
15
16         movdqa  xmm8,XMMWORD[rax]
17         lea     rax,[16+rax]
18         movdqa  xmm7,XMMWORD[80+r11]
19         pxor    xmm15,xmm8
20         pxor    xmm0,xmm8
21         pxor    xmm1,xmm8
22         pxor    xmm2,xmm8
23 DB      102,68,15,56,0,255
24 DB      102,15,56,0,199
25         pxor    xmm3,xmm8
26         pxor    xmm4,xmm8
27 DB      102,15,56,0,207
28 DB      102,15,56,0,215
29         pxor    xmm5,xmm8
30         pxor    xmm6,xmm8
31 DB      102,15,56,0,223
32 DB      102,15,56,0,231
33 DB      102,15,56,0,239
34 DB      102,15,56,0,247
35 _bsaes_encrypt8_bitslice:
36         movdqa  xmm7,XMMWORD[r11]
37         movdqa  xmm8,XMMWORD[16+r11]
38         movdqa  xmm9,xmm5
39         psrlq   xmm5,1
40         movdqa  xmm10,xmm3
41         psrlq   xmm3,1
42         pxor    xmm5,xmm6
43         pxor    xmm3,xmm4
44         pand    xmm5,xmm7
45         pand    xmm3,xmm7
46         pxor    xmm6,xmm5
47         psllq   xmm5,1
48         pxor    xmm4,xmm3
49         psllq   xmm3,1
50         pxor    xmm5,xmm9
51         pxor    xmm3,xmm10
52         movdqa  xmm9,xmm1
53         psrlq   xmm1,1
54         movdqa  xmm10,xmm15
55         psrlq   xmm15,1
56         pxor    xmm1,xmm2
57         pxor    xmm15,xmm0
58         pand    xmm1,xmm7
59         pand    xmm15,xmm7
60         pxor    xmm2,xmm1
61         psllq   xmm1,1
62         pxor    xmm0,xmm15
63         psllq   xmm15,1
64         pxor    xmm1,xmm9
65         pxor    xmm15,xmm10
66         movdqa  xmm7,XMMWORD[32+r11]
67         movdqa  xmm9,xmm4
68         psrlq   xmm4,2
69         movdqa  xmm10,xmm3
70         psrlq   xmm3,2
71         pxor    xmm4,xmm6
72         pxor    xmm3,xmm5
73         pand    xmm4,xmm8
74         pand    xmm3,xmm8
75         pxor    xmm6,xmm4
76         psllq   xmm4,2
77         pxor    xmm5,xmm3
78         psllq   xmm3,2
79         pxor    xmm4,xmm9
80         pxor    xmm3,xmm10
81         movdqa  xmm9,xmm0
82         psrlq   xmm0,2
83         movdqa  xmm10,xmm15
84         psrlq   xmm15,2
85         pxor    xmm0,xmm2
86         pxor    xmm15,xmm1
87         pand    xmm0,xmm8
88         pand    xmm15,xmm8
89         pxor    xmm2,xmm0
90         psllq   xmm0,2
91         pxor    xmm1,xmm15
92         psllq   xmm15,2
93         pxor    xmm0,xmm9
94         pxor    xmm15,xmm10
95         movdqa  xmm9,xmm2
96         psrlq   xmm2,4
97         movdqa  xmm10,xmm1
98         psrlq   xmm1,4
99         pxor    xmm2,xmm6
100         pxor    xmm1,xmm5
101         pand    xmm2,xmm7
102         pand    xmm1,xmm7
103         pxor    xmm6,xmm2
104         psllq   xmm2,4
105         pxor    xmm5,xmm1
106         psllq   xmm1,4
107         pxor    xmm2,xmm9
108         pxor    xmm1,xmm10
109         movdqa  xmm9,xmm0
110         psrlq   xmm0,4
111         movdqa  xmm10,xmm15
112         psrlq   xmm15,4
113         pxor    xmm0,xmm4
114         pxor    xmm15,xmm3
115         pand    xmm0,xmm7
116         pand    xmm15,xmm7
117         pxor    xmm4,xmm0
118         psllq   xmm0,4
119         pxor    xmm3,xmm15
120         psllq   xmm15,4
121         pxor    xmm0,xmm9
122         pxor    xmm15,xmm10
123         dec     r10d
124         jmp     NEAR $L$enc_sbox
125 ALIGN   16
126 $L$enc_loop:
127         pxor    xmm15,XMMWORD[rax]
128         pxor    xmm0,XMMWORD[16+rax]
129         pxor    xmm1,XMMWORD[32+rax]
130         pxor    xmm2,XMMWORD[48+rax]
131 DB      102,68,15,56,0,255
132 DB      102,15,56,0,199
133         pxor    xmm3,XMMWORD[64+rax]
134         pxor    xmm4,XMMWORD[80+rax]
135 DB      102,15,56,0,207
136 DB      102,15,56,0,215
137         pxor    xmm5,XMMWORD[96+rax]
138         pxor    xmm6,XMMWORD[112+rax]
139 DB      102,15,56,0,223
140 DB      102,15,56,0,231
141 DB      102,15,56,0,239
142 DB      102,15,56,0,247
143         lea     rax,[128+rax]
144 $L$enc_sbox:
145         pxor    xmm4,xmm5
146         pxor    xmm1,xmm0
147         pxor    xmm2,xmm15
148         pxor    xmm5,xmm1
149         pxor    xmm4,xmm15
150
151         pxor    xmm5,xmm2
152         pxor    xmm2,xmm6
153         pxor    xmm6,xmm4
154         pxor    xmm2,xmm3
155         pxor    xmm3,xmm4
156         pxor    xmm2,xmm0
157
158         pxor    xmm1,xmm6
159         pxor    xmm0,xmm4
160         movdqa  xmm10,xmm6
161         movdqa  xmm9,xmm0
162         movdqa  xmm8,xmm4
163         movdqa  xmm12,xmm1
164         movdqa  xmm11,xmm5
165
166         pxor    xmm10,xmm3
167         pxor    xmm9,xmm1
168         pxor    xmm8,xmm2
169         movdqa  xmm13,xmm10
170         pxor    xmm12,xmm3
171         movdqa  xmm7,xmm9
172         pxor    xmm11,xmm15
173         movdqa  xmm14,xmm10
174
175         por     xmm9,xmm8
176         por     xmm10,xmm11
177         pxor    xmm14,xmm7
178         pand    xmm13,xmm11
179         pxor    xmm11,xmm8
180         pand    xmm7,xmm8
181         pand    xmm14,xmm11
182         movdqa  xmm11,xmm2
183         pxor    xmm11,xmm15
184         pand    xmm12,xmm11
185         pxor    xmm10,xmm12
186         pxor    xmm9,xmm12
187         movdqa  xmm12,xmm6
188         movdqa  xmm11,xmm4
189         pxor    xmm12,xmm0
190         pxor    xmm11,xmm5
191         movdqa  xmm8,xmm12
192         pand    xmm12,xmm11
193         por     xmm8,xmm11
194         pxor    xmm7,xmm12
195         pxor    xmm10,xmm14
196         pxor    xmm9,xmm13
197         pxor    xmm8,xmm14
198         movdqa  xmm11,xmm1
199         pxor    xmm7,xmm13
200         movdqa  xmm12,xmm3
201         pxor    xmm8,xmm13
202         movdqa  xmm13,xmm0
203         pand    xmm11,xmm2
204         movdqa  xmm14,xmm6
205         pand    xmm12,xmm15
206         pand    xmm13,xmm4
207         por     xmm14,xmm5
208         pxor    xmm10,xmm11
209         pxor    xmm9,xmm12
210         pxor    xmm8,xmm13
211         pxor    xmm7,xmm14
212
213
214
215
216
217         movdqa  xmm11,xmm10
218         pand    xmm10,xmm8
219         pxor    xmm11,xmm9
220
221         movdqa  xmm13,xmm7
222         movdqa  xmm14,xmm11
223         pxor    xmm13,xmm10
224         pand    xmm14,xmm13
225
226         movdqa  xmm12,xmm8
227         pxor    xmm14,xmm9
228         pxor    xmm12,xmm7
229
230         pxor    xmm10,xmm9
231
232         pand    xmm12,xmm10
233
234         movdqa  xmm9,xmm13
235         pxor    xmm12,xmm7
236
237         pxor    xmm9,xmm12
238         pxor    xmm8,xmm12
239
240         pand    xmm9,xmm7
241
242         pxor    xmm13,xmm9
243         pxor    xmm8,xmm9
244
245         pand    xmm13,xmm14
246
247         pxor    xmm13,xmm11
248         movdqa  xmm11,xmm5
249         movdqa  xmm7,xmm4
250         movdqa  xmm9,xmm14
251         pxor    xmm9,xmm13
252         pand    xmm9,xmm5
253         pxor    xmm5,xmm4
254         pand    xmm4,xmm14
255         pand    xmm5,xmm13
256         pxor    xmm5,xmm4
257         pxor    xmm4,xmm9
258         pxor    xmm11,xmm15
259         pxor    xmm7,xmm2
260         pxor    xmm14,xmm12
261         pxor    xmm13,xmm8
262         movdqa  xmm10,xmm14
263         movdqa  xmm9,xmm12
264         pxor    xmm10,xmm13
265         pxor    xmm9,xmm8
266         pand    xmm10,xmm11
267         pand    xmm9,xmm15
268         pxor    xmm11,xmm7
269         pxor    xmm15,xmm2
270         pand    xmm7,xmm14
271         pand    xmm2,xmm12
272         pand    xmm11,xmm13
273         pand    xmm15,xmm8
274         pxor    xmm7,xmm11
275         pxor    xmm15,xmm2
276         pxor    xmm11,xmm10
277         pxor    xmm2,xmm9
278         pxor    xmm5,xmm11
279         pxor    xmm15,xmm11
280         pxor    xmm4,xmm7
281         pxor    xmm2,xmm7
282
283         movdqa  xmm11,xmm6
284         movdqa  xmm7,xmm0
285         pxor    xmm11,xmm3
286         pxor    xmm7,xmm1
287         movdqa  xmm10,xmm14
288         movdqa  xmm9,xmm12
289         pxor    xmm10,xmm13
290         pxor    xmm9,xmm8
291         pand    xmm10,xmm11
292         pand    xmm9,xmm3
293         pxor    xmm11,xmm7
294         pxor    xmm3,xmm1
295         pand    xmm7,xmm14
296         pand    xmm1,xmm12
297         pand    xmm11,xmm13
298         pand    xmm3,xmm8
299         pxor    xmm7,xmm11
300         pxor    xmm3,xmm1
301         pxor    xmm11,xmm10
302         pxor    xmm1,xmm9
303         pxor    xmm14,xmm12
304         pxor    xmm13,xmm8
305         movdqa  xmm10,xmm14
306         pxor    xmm10,xmm13
307         pand    xmm10,xmm6
308         pxor    xmm6,xmm0
309         pand    xmm0,xmm14
310         pand    xmm6,xmm13
311         pxor    xmm6,xmm0
312         pxor    xmm0,xmm10
313         pxor    xmm6,xmm11
314         pxor    xmm3,xmm11
315         pxor    xmm0,xmm7
316         pxor    xmm1,xmm7
317         pxor    xmm6,xmm15
318         pxor    xmm0,xmm5
319         pxor    xmm3,xmm6
320         pxor    xmm5,xmm15
321         pxor    xmm15,xmm0
322
323         pxor    xmm0,xmm4
324         pxor    xmm4,xmm1
325         pxor    xmm1,xmm2
326         pxor    xmm2,xmm4
327         pxor    xmm3,xmm4
328
329         pxor    xmm5,xmm2
330         dec     r10d
331         jl      NEAR $L$enc_done
332         pshufd  xmm7,xmm15,0x93
333         pshufd  xmm8,xmm0,0x93
334         pxor    xmm15,xmm7
335         pshufd  xmm9,xmm3,0x93
336         pxor    xmm0,xmm8
337         pshufd  xmm10,xmm5,0x93
338         pxor    xmm3,xmm9
339         pshufd  xmm11,xmm2,0x93
340         pxor    xmm5,xmm10
341         pshufd  xmm12,xmm6,0x93
342         pxor    xmm2,xmm11
343         pshufd  xmm13,xmm1,0x93
344         pxor    xmm6,xmm12
345         pshufd  xmm14,xmm4,0x93
346         pxor    xmm1,xmm13
347         pxor    xmm4,xmm14
348
349         pxor    xmm8,xmm15
350         pxor    xmm7,xmm4
351         pxor    xmm8,xmm4
352         pshufd  xmm15,xmm15,0x4E
353         pxor    xmm9,xmm0
354         pshufd  xmm0,xmm0,0x4E
355         pxor    xmm12,xmm2
356         pxor    xmm15,xmm7
357         pxor    xmm13,xmm6
358         pxor    xmm0,xmm8
359         pxor    xmm11,xmm5
360         pshufd  xmm7,xmm2,0x4E
361         pxor    xmm14,xmm1
362         pshufd  xmm8,xmm6,0x4E
363         pxor    xmm10,xmm3
364         pshufd  xmm2,xmm5,0x4E
365         pxor    xmm10,xmm4
366         pshufd  xmm6,xmm4,0x4E
367         pxor    xmm11,xmm4
368         pshufd  xmm5,xmm1,0x4E
369         pxor    xmm7,xmm11
370         pshufd  xmm1,xmm3,0x4E
371         pxor    xmm8,xmm12
372         pxor    xmm2,xmm10
373         pxor    xmm6,xmm14
374         pxor    xmm5,xmm13
375         movdqa  xmm3,xmm7
376         pxor    xmm1,xmm9
377         movdqa  xmm4,xmm8
378         movdqa  xmm7,XMMWORD[48+r11]
379         jnz     NEAR $L$enc_loop
380         movdqa  xmm7,XMMWORD[64+r11]
381         jmp     NEAR $L$enc_loop
382 ALIGN   16
383 $L$enc_done:
384         movdqa  xmm7,XMMWORD[r11]
385         movdqa  xmm8,XMMWORD[16+r11]
386         movdqa  xmm9,xmm1
387         psrlq   xmm1,1
388         movdqa  xmm10,xmm2
389         psrlq   xmm2,1
390         pxor    xmm1,xmm4
391         pxor    xmm2,xmm6
392         pand    xmm1,xmm7
393         pand    xmm2,xmm7
394         pxor    xmm4,xmm1
395         psllq   xmm1,1
396         pxor    xmm6,xmm2
397         psllq   xmm2,1
398         pxor    xmm1,xmm9
399         pxor    xmm2,xmm10
400         movdqa  xmm9,xmm3
401         psrlq   xmm3,1
402         movdqa  xmm10,xmm15
403         psrlq   xmm15,1
404         pxor    xmm3,xmm5
405         pxor    xmm15,xmm0
406         pand    xmm3,xmm7
407         pand    xmm15,xmm7
408         pxor    xmm5,xmm3
409         psllq   xmm3,1
410         pxor    xmm0,xmm15
411         psllq   xmm15,1
412         pxor    xmm3,xmm9
413         pxor    xmm15,xmm10
414         movdqa  xmm7,XMMWORD[32+r11]
415         movdqa  xmm9,xmm6
416         psrlq   xmm6,2
417         movdqa  xmm10,xmm2
418         psrlq   xmm2,2
419         pxor    xmm6,xmm4
420         pxor    xmm2,xmm1
421         pand    xmm6,xmm8
422         pand    xmm2,xmm8
423         pxor    xmm4,xmm6
424         psllq   xmm6,2
425         pxor    xmm1,xmm2
426         psllq   xmm2,2
427         pxor    xmm6,xmm9
428         pxor    xmm2,xmm10
429         movdqa  xmm9,xmm0
430         psrlq   xmm0,2
431         movdqa  xmm10,xmm15
432         psrlq   xmm15,2
433         pxor    xmm0,xmm5
434         pxor    xmm15,xmm3
435         pand    xmm0,xmm8
436         pand    xmm15,xmm8
437         pxor    xmm5,xmm0
438         psllq   xmm0,2
439         pxor    xmm3,xmm15
440         psllq   xmm15,2
441         pxor    xmm0,xmm9
442         pxor    xmm15,xmm10
443         movdqa  xmm9,xmm5
444         psrlq   xmm5,4
445         movdqa  xmm10,xmm3
446         psrlq   xmm3,4
447         pxor    xmm5,xmm4
448         pxor    xmm3,xmm1
449         pand    xmm5,xmm7
450         pand    xmm3,xmm7
451         pxor    xmm4,xmm5
452         psllq   xmm5,4
453         pxor    xmm1,xmm3
454         psllq   xmm3,4
455         pxor    xmm5,xmm9
456         pxor    xmm3,xmm10
457         movdqa  xmm9,xmm0
458         psrlq   xmm0,4
459         movdqa  xmm10,xmm15
460         psrlq   xmm15,4
461         pxor    xmm0,xmm6
462         pxor    xmm15,xmm2
463         pand    xmm0,xmm7
464         pand    xmm15,xmm7
465         pxor    xmm6,xmm0
466         psllq   xmm0,4
467         pxor    xmm2,xmm15
468         psllq   xmm15,4
469         pxor    xmm0,xmm9
470         pxor    xmm15,xmm10
471         movdqa  xmm7,XMMWORD[rax]
472         pxor    xmm3,xmm7
473         pxor    xmm5,xmm7
474         pxor    xmm2,xmm7
475         pxor    xmm6,xmm7
476         pxor    xmm1,xmm7
477         pxor    xmm4,xmm7
478         pxor    xmm15,xmm7
479         pxor    xmm0,xmm7
480         DB      0F3h,0C3h               ;repret
481
482
483
484 ALIGN   64
485 _bsaes_decrypt8:
486         lea     r11,[$L$BS0]
487
488         movdqa  xmm8,XMMWORD[rax]
489         lea     rax,[16+rax]
490         movdqa  xmm7,XMMWORD[((-48))+r11]
491         pxor    xmm15,xmm8
492         pxor    xmm0,xmm8
493         pxor    xmm1,xmm8
494         pxor    xmm2,xmm8
495 DB      102,68,15,56,0,255
496 DB      102,15,56,0,199
497         pxor    xmm3,xmm8
498         pxor    xmm4,xmm8
499 DB      102,15,56,0,207
500 DB      102,15,56,0,215
501         pxor    xmm5,xmm8
502         pxor    xmm6,xmm8
503 DB      102,15,56,0,223
504 DB      102,15,56,0,231
505 DB      102,15,56,0,239
506 DB      102,15,56,0,247
507         movdqa  xmm7,XMMWORD[r11]
508         movdqa  xmm8,XMMWORD[16+r11]
509         movdqa  xmm9,xmm5
510         psrlq   xmm5,1
511         movdqa  xmm10,xmm3
512         psrlq   xmm3,1
513         pxor    xmm5,xmm6
514         pxor    xmm3,xmm4
515         pand    xmm5,xmm7
516         pand    xmm3,xmm7
517         pxor    xmm6,xmm5
518         psllq   xmm5,1
519         pxor    xmm4,xmm3
520         psllq   xmm3,1
521         pxor    xmm5,xmm9
522         pxor    xmm3,xmm10
523         movdqa  xmm9,xmm1
524         psrlq   xmm1,1
525         movdqa  xmm10,xmm15
526         psrlq   xmm15,1
527         pxor    xmm1,xmm2
528         pxor    xmm15,xmm0
529         pand    xmm1,xmm7
530         pand    xmm15,xmm7
531         pxor    xmm2,xmm1
532         psllq   xmm1,1
533         pxor    xmm0,xmm15
534         psllq   xmm15,1
535         pxor    xmm1,xmm9
536         pxor    xmm15,xmm10
537         movdqa  xmm7,XMMWORD[32+r11]
538         movdqa  xmm9,xmm4
539         psrlq   xmm4,2
540         movdqa  xmm10,xmm3
541         psrlq   xmm3,2
542         pxor    xmm4,xmm6
543         pxor    xmm3,xmm5
544         pand    xmm4,xmm8
545         pand    xmm3,xmm8
546         pxor    xmm6,xmm4
547         psllq   xmm4,2
548         pxor    xmm5,xmm3
549         psllq   xmm3,2
550         pxor    xmm4,xmm9
551         pxor    xmm3,xmm10
552         movdqa  xmm9,xmm0
553         psrlq   xmm0,2
554         movdqa  xmm10,xmm15
555         psrlq   xmm15,2
556         pxor    xmm0,xmm2
557         pxor    xmm15,xmm1
558         pand    xmm0,xmm8
559         pand    xmm15,xmm8
560         pxor    xmm2,xmm0
561         psllq   xmm0,2
562         pxor    xmm1,xmm15
563         psllq   xmm15,2
564         pxor    xmm0,xmm9
565         pxor    xmm15,xmm10
566         movdqa  xmm9,xmm2
567         psrlq   xmm2,4
568         movdqa  xmm10,xmm1
569         psrlq   xmm1,4
570         pxor    xmm2,xmm6
571         pxor    xmm1,xmm5
572         pand    xmm2,xmm7
573         pand    xmm1,xmm7
574         pxor    xmm6,xmm2
575         psllq   xmm2,4
576         pxor    xmm5,xmm1
577         psllq   xmm1,4
578         pxor    xmm2,xmm9
579         pxor    xmm1,xmm10
580         movdqa  xmm9,xmm0
581         psrlq   xmm0,4
582         movdqa  xmm10,xmm15
583         psrlq   xmm15,4
584         pxor    xmm0,xmm4
585         pxor    xmm15,xmm3
586         pand    xmm0,xmm7
587         pand    xmm15,xmm7
588         pxor    xmm4,xmm0
589         psllq   xmm0,4
590         pxor    xmm3,xmm15
591         psllq   xmm15,4
592         pxor    xmm0,xmm9
593         pxor    xmm15,xmm10
594         dec     r10d
595         jmp     NEAR $L$dec_sbox
596 ALIGN   16
597 $L$dec_loop:
598         pxor    xmm15,XMMWORD[rax]
599         pxor    xmm0,XMMWORD[16+rax]
600         pxor    xmm1,XMMWORD[32+rax]
601         pxor    xmm2,XMMWORD[48+rax]
602 DB      102,68,15,56,0,255
603 DB      102,15,56,0,199
604         pxor    xmm3,XMMWORD[64+rax]
605         pxor    xmm4,XMMWORD[80+rax]
606 DB      102,15,56,0,207
607 DB      102,15,56,0,215
608         pxor    xmm5,XMMWORD[96+rax]
609         pxor    xmm6,XMMWORD[112+rax]
610 DB      102,15,56,0,223
611 DB      102,15,56,0,231
612 DB      102,15,56,0,239
613 DB      102,15,56,0,247
614         lea     rax,[128+rax]
615 $L$dec_sbox:
616         pxor    xmm2,xmm3
617
618         pxor    xmm3,xmm6
619         pxor    xmm1,xmm6
620         pxor    xmm5,xmm3
621         pxor    xmm6,xmm5
622         pxor    xmm0,xmm6
623
624         pxor    xmm15,xmm0
625         pxor    xmm1,xmm4
626         pxor    xmm2,xmm15
627         pxor    xmm4,xmm15
628         pxor    xmm0,xmm2
629         movdqa  xmm10,xmm2
630         movdqa  xmm9,xmm6
631         movdqa  xmm8,xmm0
632         movdqa  xmm12,xmm3
633         movdqa  xmm11,xmm4
634
635         pxor    xmm10,xmm15
636         pxor    xmm9,xmm3
637         pxor    xmm8,xmm5
638         movdqa  xmm13,xmm10
639         pxor    xmm12,xmm15
640         movdqa  xmm7,xmm9
641         pxor    xmm11,xmm1
642         movdqa  xmm14,xmm10
643
644         por     xmm9,xmm8
645         por     xmm10,xmm11
646         pxor    xmm14,xmm7
647         pand    xmm13,xmm11
648         pxor    xmm11,xmm8
649         pand    xmm7,xmm8
650         pand    xmm14,xmm11
651         movdqa  xmm11,xmm5
652         pxor    xmm11,xmm1
653         pand    xmm12,xmm11
654         pxor    xmm10,xmm12
655         pxor    xmm9,xmm12
656         movdqa  xmm12,xmm2
657         movdqa  xmm11,xmm0
658         pxor    xmm12,xmm6
659         pxor    xmm11,xmm4
660         movdqa  xmm8,xmm12
661         pand    xmm12,xmm11
662         por     xmm8,xmm11
663         pxor    xmm7,xmm12
664         pxor    xmm10,xmm14
665         pxor    xmm9,xmm13
666         pxor    xmm8,xmm14
667         movdqa  xmm11,xmm3
668         pxor    xmm7,xmm13
669         movdqa  xmm12,xmm15
670         pxor    xmm8,xmm13
671         movdqa  xmm13,xmm6
672         pand    xmm11,xmm5
673         movdqa  xmm14,xmm2
674         pand    xmm12,xmm1
675         pand    xmm13,xmm0
676         por     xmm14,xmm4
677         pxor    xmm10,xmm11
678         pxor    xmm9,xmm12
679         pxor    xmm8,xmm13
680         pxor    xmm7,xmm14
681
682
683
684
685
686         movdqa  xmm11,xmm10
687         pand    xmm10,xmm8
688         pxor    xmm11,xmm9
689
690         movdqa  xmm13,xmm7
691         movdqa  xmm14,xmm11
692         pxor    xmm13,xmm10
693         pand    xmm14,xmm13
694
695         movdqa  xmm12,xmm8
696         pxor    xmm14,xmm9
697         pxor    xmm12,xmm7
698
699         pxor    xmm10,xmm9
700
701         pand    xmm12,xmm10
702
703         movdqa  xmm9,xmm13
704         pxor    xmm12,xmm7
705
706         pxor    xmm9,xmm12
707         pxor    xmm8,xmm12
708
709         pand    xmm9,xmm7
710
711         pxor    xmm13,xmm9
712         pxor    xmm8,xmm9
713
714         pand    xmm13,xmm14
715
716         pxor    xmm13,xmm11
717         movdqa  xmm11,xmm4
718         movdqa  xmm7,xmm0
719         movdqa  xmm9,xmm14
720         pxor    xmm9,xmm13
721         pand    xmm9,xmm4
722         pxor    xmm4,xmm0
723         pand    xmm0,xmm14
724         pand    xmm4,xmm13
725         pxor    xmm4,xmm0
726         pxor    xmm0,xmm9
727         pxor    xmm11,xmm1
728         pxor    xmm7,xmm5
729         pxor    xmm14,xmm12
730         pxor    xmm13,xmm8
731         movdqa  xmm10,xmm14
732         movdqa  xmm9,xmm12
733         pxor    xmm10,xmm13
734         pxor    xmm9,xmm8
735         pand    xmm10,xmm11
736         pand    xmm9,xmm1
737         pxor    xmm11,xmm7
738         pxor    xmm1,xmm5
739         pand    xmm7,xmm14
740         pand    xmm5,xmm12
741         pand    xmm11,xmm13
742         pand    xmm1,xmm8
743         pxor    xmm7,xmm11
744         pxor    xmm1,xmm5
745         pxor    xmm11,xmm10
746         pxor    xmm5,xmm9
747         pxor    xmm4,xmm11
748         pxor    xmm1,xmm11
749         pxor    xmm0,xmm7
750         pxor    xmm5,xmm7
751
752         movdqa  xmm11,xmm2
753         movdqa  xmm7,xmm6
754         pxor    xmm11,xmm15
755         pxor    xmm7,xmm3
756         movdqa  xmm10,xmm14
757         movdqa  xmm9,xmm12
758         pxor    xmm10,xmm13
759         pxor    xmm9,xmm8
760         pand    xmm10,xmm11
761         pand    xmm9,xmm15
762         pxor    xmm11,xmm7
763         pxor    xmm15,xmm3
764         pand    xmm7,xmm14
765         pand    xmm3,xmm12
766         pand    xmm11,xmm13
767         pand    xmm15,xmm8
768         pxor    xmm7,xmm11
769         pxor    xmm15,xmm3
770         pxor    xmm11,xmm10
771         pxor    xmm3,xmm9
772         pxor    xmm14,xmm12
773         pxor    xmm13,xmm8
774         movdqa  xmm10,xmm14
775         pxor    xmm10,xmm13
776         pand    xmm10,xmm2
777         pxor    xmm2,xmm6
778         pand    xmm6,xmm14
779         pand    xmm2,xmm13
780         pxor    xmm2,xmm6
781         pxor    xmm6,xmm10
782         pxor    xmm2,xmm11
783         pxor    xmm15,xmm11
784         pxor    xmm6,xmm7
785         pxor    xmm3,xmm7
786         pxor    xmm0,xmm6
787         pxor    xmm5,xmm4
788
789         pxor    xmm3,xmm0
790         pxor    xmm1,xmm6
791         pxor    xmm4,xmm6
792         pxor    xmm3,xmm1
793         pxor    xmm6,xmm15
794         pxor    xmm3,xmm4
795         pxor    xmm2,xmm5
796         pxor    xmm5,xmm0
797         pxor    xmm2,xmm3
798
799         pxor    xmm3,xmm15
800         pxor    xmm6,xmm2
801         dec     r10d
802         jl      NEAR $L$dec_done
803
804         pshufd  xmm7,xmm15,0x4E
805         pshufd  xmm13,xmm2,0x4E
806         pxor    xmm7,xmm15
807         pshufd  xmm14,xmm4,0x4E
808         pxor    xmm13,xmm2
809         pshufd  xmm8,xmm0,0x4E
810         pxor    xmm14,xmm4
811         pshufd  xmm9,xmm5,0x4E
812         pxor    xmm8,xmm0
813         pshufd  xmm10,xmm3,0x4E
814         pxor    xmm9,xmm5
815         pxor    xmm15,xmm13
816         pxor    xmm0,xmm13
817         pshufd  xmm11,xmm1,0x4E
818         pxor    xmm10,xmm3
819         pxor    xmm5,xmm7
820         pxor    xmm3,xmm8
821         pshufd  xmm12,xmm6,0x4E
822         pxor    xmm11,xmm1
823         pxor    xmm0,xmm14
824         pxor    xmm1,xmm9
825         pxor    xmm12,xmm6
826
827         pxor    xmm5,xmm14
828         pxor    xmm3,xmm13
829         pxor    xmm1,xmm13
830         pxor    xmm6,xmm10
831         pxor    xmm2,xmm11
832         pxor    xmm1,xmm14
833         pxor    xmm6,xmm14
834         pxor    xmm4,xmm12
835         pshufd  xmm7,xmm15,0x93
836         pshufd  xmm8,xmm0,0x93
837         pxor    xmm15,xmm7
838         pshufd  xmm9,xmm5,0x93
839         pxor    xmm0,xmm8
840         pshufd  xmm10,xmm3,0x93
841         pxor    xmm5,xmm9
842         pshufd  xmm11,xmm1,0x93
843         pxor    xmm3,xmm10
844         pshufd  xmm12,xmm6,0x93
845         pxor    xmm1,xmm11
846         pshufd  xmm13,xmm2,0x93
847         pxor    xmm6,xmm12
848         pshufd  xmm14,xmm4,0x93
849         pxor    xmm2,xmm13
850         pxor    xmm4,xmm14
851
852         pxor    xmm8,xmm15
853         pxor    xmm7,xmm4
854         pxor    xmm8,xmm4
855         pshufd  xmm15,xmm15,0x4E
856         pxor    xmm9,xmm0
857         pshufd  xmm0,xmm0,0x4E
858         pxor    xmm12,xmm1
859         pxor    xmm15,xmm7
860         pxor    xmm13,xmm6
861         pxor    xmm0,xmm8
862         pxor    xmm11,xmm3
863         pshufd  xmm7,xmm1,0x4E
864         pxor    xmm14,xmm2
865         pshufd  xmm8,xmm6,0x4E
866         pxor    xmm10,xmm5
867         pshufd  xmm1,xmm3,0x4E
868         pxor    xmm10,xmm4
869         pshufd  xmm6,xmm4,0x4E
870         pxor    xmm11,xmm4
871         pshufd  xmm3,xmm2,0x4E
872         pxor    xmm7,xmm11
873         pshufd  xmm2,xmm5,0x4E
874         pxor    xmm8,xmm12
875         pxor    xmm10,xmm1
876         pxor    xmm6,xmm14
877         pxor    xmm13,xmm3
878         movdqa  xmm3,xmm7
879         pxor    xmm2,xmm9
880         movdqa  xmm5,xmm13
881         movdqa  xmm4,xmm8
882         movdqa  xmm1,xmm2
883         movdqa  xmm2,xmm10
884         movdqa  xmm7,XMMWORD[((-16))+r11]
885         jnz     NEAR $L$dec_loop
886         movdqa  xmm7,XMMWORD[((-32))+r11]
887         jmp     NEAR $L$dec_loop
888 ALIGN   16
889 $L$dec_done:
890         movdqa  xmm7,XMMWORD[r11]
891         movdqa  xmm8,XMMWORD[16+r11]
892         movdqa  xmm9,xmm2
893         psrlq   xmm2,1
894         movdqa  xmm10,xmm1
895         psrlq   xmm1,1
896         pxor    xmm2,xmm4
897         pxor    xmm1,xmm6
898         pand    xmm2,xmm7
899         pand    xmm1,xmm7
900         pxor    xmm4,xmm2
901         psllq   xmm2,1
902         pxor    xmm6,xmm1
903         psllq   xmm1,1
904         pxor    xmm2,xmm9
905         pxor    xmm1,xmm10
906         movdqa  xmm9,xmm5
907         psrlq   xmm5,1
908         movdqa  xmm10,xmm15
909         psrlq   xmm15,1
910         pxor    xmm5,xmm3
911         pxor    xmm15,xmm0
912         pand    xmm5,xmm7
913         pand    xmm15,xmm7
914         pxor    xmm3,xmm5
915         psllq   xmm5,1
916         pxor    xmm0,xmm15
917         psllq   xmm15,1
918         pxor    xmm5,xmm9
919         pxor    xmm15,xmm10
920         movdqa  xmm7,XMMWORD[32+r11]
921         movdqa  xmm9,xmm6
922         psrlq   xmm6,2
923         movdqa  xmm10,xmm1
924         psrlq   xmm1,2
925         pxor    xmm6,xmm4
926         pxor    xmm1,xmm2
927         pand    xmm6,xmm8
928         pand    xmm1,xmm8
929         pxor    xmm4,xmm6
930         psllq   xmm6,2
931         pxor    xmm2,xmm1
932         psllq   xmm1,2
933         pxor    xmm6,xmm9
934         pxor    xmm1,xmm10
935         movdqa  xmm9,xmm0
936         psrlq   xmm0,2
937         movdqa  xmm10,xmm15
938         psrlq   xmm15,2
939         pxor    xmm0,xmm3
940         pxor    xmm15,xmm5
941         pand    xmm0,xmm8
942         pand    xmm15,xmm8
943         pxor    xmm3,xmm0
944         psllq   xmm0,2
945         pxor    xmm5,xmm15
946         psllq   xmm15,2
947         pxor    xmm0,xmm9
948         pxor    xmm15,xmm10
949         movdqa  xmm9,xmm3
950         psrlq   xmm3,4
951         movdqa  xmm10,xmm5
952         psrlq   xmm5,4
953         pxor    xmm3,xmm4
954         pxor    xmm5,xmm2
955         pand    xmm3,xmm7
956         pand    xmm5,xmm7
957         pxor    xmm4,xmm3
958         psllq   xmm3,4
959         pxor    xmm2,xmm5
960         psllq   xmm5,4
961         pxor    xmm3,xmm9
962         pxor    xmm5,xmm10
963         movdqa  xmm9,xmm0
964         psrlq   xmm0,4
965         movdqa  xmm10,xmm15
966         psrlq   xmm15,4
967         pxor    xmm0,xmm6
968         pxor    xmm15,xmm1
969         pand    xmm0,xmm7
970         pand    xmm15,xmm7
971         pxor    xmm6,xmm0
972         psllq   xmm0,4
973         pxor    xmm1,xmm15
974         psllq   xmm15,4
975         pxor    xmm0,xmm9
976         pxor    xmm15,xmm10
977         movdqa  xmm7,XMMWORD[rax]
978         pxor    xmm5,xmm7
979         pxor    xmm3,xmm7
980         pxor    xmm1,xmm7
981         pxor    xmm6,xmm7
982         pxor    xmm2,xmm7
983         pxor    xmm4,xmm7
984         pxor    xmm15,xmm7
985         pxor    xmm0,xmm7
986         DB      0F3h,0C3h               ;repret
987
988
989 ALIGN   16
990 _bsaes_key_convert:
991         lea     r11,[$L$masks]
992         movdqu  xmm7,XMMWORD[rcx]
993         lea     rcx,[16+rcx]
994         movdqa  xmm0,XMMWORD[r11]
995         movdqa  xmm1,XMMWORD[16+r11]
996         movdqa  xmm2,XMMWORD[32+r11]
997         movdqa  xmm3,XMMWORD[48+r11]
998         movdqa  xmm4,XMMWORD[64+r11]
999         pcmpeqd xmm5,xmm5
1000
1001         movdqu  xmm6,XMMWORD[rcx]
1002         movdqa  XMMWORD[rax],xmm7
1003         lea     rax,[16+rax]
1004         dec     r10d
1005         jmp     NEAR $L$key_loop
1006 ALIGN   16
1007 $L$key_loop:
1008 DB      102,15,56,0,244
1009
1010         movdqa  xmm8,xmm0
1011         movdqa  xmm9,xmm1
1012
1013         pand    xmm8,xmm6
1014         pand    xmm9,xmm6
1015         movdqa  xmm10,xmm2
1016         pcmpeqb xmm8,xmm0
1017         psllq   xmm0,4
1018         movdqa  xmm11,xmm3
1019         pcmpeqb xmm9,xmm1
1020         psllq   xmm1,4
1021
1022         pand    xmm10,xmm6
1023         pand    xmm11,xmm6
1024         movdqa  xmm12,xmm0
1025         pcmpeqb xmm10,xmm2
1026         psllq   xmm2,4
1027         movdqa  xmm13,xmm1
1028         pcmpeqb xmm11,xmm3
1029         psllq   xmm3,4
1030
1031         movdqa  xmm14,xmm2
1032         movdqa  xmm15,xmm3
1033         pxor    xmm8,xmm5
1034         pxor    xmm9,xmm5
1035
1036         pand    xmm12,xmm6
1037         pand    xmm13,xmm6
1038         movdqa  XMMWORD[rax],xmm8
1039         pcmpeqb xmm12,xmm0
1040         psrlq   xmm0,4
1041         movdqa  XMMWORD[16+rax],xmm9
1042         pcmpeqb xmm13,xmm1
1043         psrlq   xmm1,4
1044         lea     rcx,[16+rcx]
1045
1046         pand    xmm14,xmm6
1047         pand    xmm15,xmm6
1048         movdqa  XMMWORD[32+rax],xmm10
1049         pcmpeqb xmm14,xmm2
1050         psrlq   xmm2,4
1051         movdqa  XMMWORD[48+rax],xmm11
1052         pcmpeqb xmm15,xmm3
1053         psrlq   xmm3,4
1054         movdqu  xmm6,XMMWORD[rcx]
1055
1056         pxor    xmm13,xmm5
1057         pxor    xmm14,xmm5
1058         movdqa  XMMWORD[64+rax],xmm12
1059         movdqa  XMMWORD[80+rax],xmm13
1060         movdqa  XMMWORD[96+rax],xmm14
1061         movdqa  XMMWORD[112+rax],xmm15
1062         lea     rax,[128+rax]
1063         dec     r10d
1064         jnz     NEAR $L$key_loop
1065
1066         movdqa  xmm7,XMMWORD[80+r11]
1067
1068         DB      0F3h,0C3h               ;repret
1069
1070 EXTERN  asm_AES_cbc_encrypt
1071 global  bsaes_cbc_encrypt
1072
1073 ALIGN   16
1074 bsaes_cbc_encrypt:
1075         mov     r11d,DWORD[48+rsp]
1076         cmp     r11d,0
1077         jne     NEAR asm_AES_cbc_encrypt
1078         cmp     r8,128
1079         jb      NEAR asm_AES_cbc_encrypt
1080
1081         mov     rax,rsp
1082 $L$cbc_dec_prologue:
1083         push    rbp
1084         push    rbx
1085         push    r12
1086         push    r13
1087         push    r14
1088         push    r15
1089         lea     rsp,[((-72))+rsp]
1090         mov     r10,QWORD[160+rsp]
1091         lea     rsp,[((-160))+rsp]
1092         movaps  XMMWORD[64+rsp],xmm6
1093         movaps  XMMWORD[80+rsp],xmm7
1094         movaps  XMMWORD[96+rsp],xmm8
1095         movaps  XMMWORD[112+rsp],xmm9
1096         movaps  XMMWORD[128+rsp],xmm10
1097         movaps  XMMWORD[144+rsp],xmm11
1098         movaps  XMMWORD[160+rsp],xmm12
1099         movaps  XMMWORD[176+rsp],xmm13
1100         movaps  XMMWORD[192+rsp],xmm14
1101         movaps  XMMWORD[208+rsp],xmm15
1102 $L$cbc_dec_body:
1103         mov     rbp,rsp
1104         mov     eax,DWORD[240+r9]
1105         mov     r12,rcx
1106         mov     r13,rdx
1107         mov     r14,r8
1108         mov     r15,r9
1109         mov     rbx,r10
1110         shr     r14,4
1111
1112         mov     edx,eax
1113         shl     rax,7
1114         sub     rax,96
1115         sub     rsp,rax
1116
1117         mov     rax,rsp
1118         mov     rcx,r15
1119         mov     r10d,edx
1120         call    _bsaes_key_convert
1121         pxor    xmm7,XMMWORD[rsp]
1122         movdqa  XMMWORD[rax],xmm6
1123         movdqa  XMMWORD[rsp],xmm7
1124
1125         movdqu  xmm14,XMMWORD[rbx]
1126         sub     r14,8
1127 $L$cbc_dec_loop:
1128         movdqu  xmm15,XMMWORD[r12]
1129         movdqu  xmm0,XMMWORD[16+r12]
1130         movdqu  xmm1,XMMWORD[32+r12]
1131         movdqu  xmm2,XMMWORD[48+r12]
1132         movdqu  xmm3,XMMWORD[64+r12]
1133         movdqu  xmm4,XMMWORD[80+r12]
1134         mov     rax,rsp
1135         movdqu  xmm5,XMMWORD[96+r12]
1136         mov     r10d,edx
1137         movdqu  xmm6,XMMWORD[112+r12]
1138         movdqa  XMMWORD[32+rbp],xmm14
1139
1140         call    _bsaes_decrypt8
1141
1142         pxor    xmm15,XMMWORD[32+rbp]
1143         movdqu  xmm7,XMMWORD[r12]
1144         movdqu  xmm8,XMMWORD[16+r12]
1145         pxor    xmm0,xmm7
1146         movdqu  xmm9,XMMWORD[32+r12]
1147         pxor    xmm5,xmm8
1148         movdqu  xmm10,XMMWORD[48+r12]
1149         pxor    xmm3,xmm9
1150         movdqu  xmm11,XMMWORD[64+r12]
1151         pxor    xmm1,xmm10
1152         movdqu  xmm12,XMMWORD[80+r12]
1153         pxor    xmm6,xmm11
1154         movdqu  xmm13,XMMWORD[96+r12]
1155         pxor    xmm2,xmm12
1156         movdqu  xmm14,XMMWORD[112+r12]
1157         pxor    xmm4,xmm13
1158         movdqu  XMMWORD[r13],xmm15
1159         lea     r12,[128+r12]
1160         movdqu  XMMWORD[16+r13],xmm0
1161         movdqu  XMMWORD[32+r13],xmm5
1162         movdqu  XMMWORD[48+r13],xmm3
1163         movdqu  XMMWORD[64+r13],xmm1
1164         movdqu  XMMWORD[80+r13],xmm6
1165         movdqu  XMMWORD[96+r13],xmm2
1166         movdqu  XMMWORD[112+r13],xmm4
1167         lea     r13,[128+r13]
1168         sub     r14,8
1169         jnc     NEAR $L$cbc_dec_loop
1170
1171         add     r14,8
1172         jz      NEAR $L$cbc_dec_done
1173
1174         movdqu  xmm15,XMMWORD[r12]
1175         mov     rax,rsp
1176         mov     r10d,edx
1177         cmp     r14,2
1178         jb      NEAR $L$cbc_dec_one
1179         movdqu  xmm0,XMMWORD[16+r12]
1180         je      NEAR $L$cbc_dec_two
1181         movdqu  xmm1,XMMWORD[32+r12]
1182         cmp     r14,4
1183         jb      NEAR $L$cbc_dec_three
1184         movdqu  xmm2,XMMWORD[48+r12]
1185         je      NEAR $L$cbc_dec_four
1186         movdqu  xmm3,XMMWORD[64+r12]
1187         cmp     r14,6
1188         jb      NEAR $L$cbc_dec_five
1189         movdqu  xmm4,XMMWORD[80+r12]
1190         je      NEAR $L$cbc_dec_six
1191         movdqu  xmm5,XMMWORD[96+r12]
1192         movdqa  XMMWORD[32+rbp],xmm14
1193         call    _bsaes_decrypt8
1194         pxor    xmm15,XMMWORD[32+rbp]
1195         movdqu  xmm7,XMMWORD[r12]
1196         movdqu  xmm8,XMMWORD[16+r12]
1197         pxor    xmm0,xmm7
1198         movdqu  xmm9,XMMWORD[32+r12]
1199         pxor    xmm5,xmm8
1200         movdqu  xmm10,XMMWORD[48+r12]
1201         pxor    xmm3,xmm9
1202         movdqu  xmm11,XMMWORD[64+r12]
1203         pxor    xmm1,xmm10
1204         movdqu  xmm12,XMMWORD[80+r12]
1205         pxor    xmm6,xmm11
1206         movdqu  xmm14,XMMWORD[96+r12]
1207         pxor    xmm2,xmm12
1208         movdqu  XMMWORD[r13],xmm15
1209         movdqu  XMMWORD[16+r13],xmm0
1210         movdqu  XMMWORD[32+r13],xmm5
1211         movdqu  XMMWORD[48+r13],xmm3
1212         movdqu  XMMWORD[64+r13],xmm1
1213         movdqu  XMMWORD[80+r13],xmm6
1214         movdqu  XMMWORD[96+r13],xmm2
1215         jmp     NEAR $L$cbc_dec_done
1216 ALIGN   16
1217 $L$cbc_dec_six:
1218         movdqa  XMMWORD[32+rbp],xmm14
1219         call    _bsaes_decrypt8
1220         pxor    xmm15,XMMWORD[32+rbp]
1221         movdqu  xmm7,XMMWORD[r12]
1222         movdqu  xmm8,XMMWORD[16+r12]
1223         pxor    xmm0,xmm7
1224         movdqu  xmm9,XMMWORD[32+r12]
1225         pxor    xmm5,xmm8
1226         movdqu  xmm10,XMMWORD[48+r12]
1227         pxor    xmm3,xmm9
1228         movdqu  xmm11,XMMWORD[64+r12]
1229         pxor    xmm1,xmm10
1230         movdqu  xmm14,XMMWORD[80+r12]
1231         pxor    xmm6,xmm11
1232         movdqu  XMMWORD[r13],xmm15
1233         movdqu  XMMWORD[16+r13],xmm0
1234         movdqu  XMMWORD[32+r13],xmm5
1235         movdqu  XMMWORD[48+r13],xmm3
1236         movdqu  XMMWORD[64+r13],xmm1
1237         movdqu  XMMWORD[80+r13],xmm6
1238         jmp     NEAR $L$cbc_dec_done
1239 ALIGN   16
1240 $L$cbc_dec_five:
1241         movdqa  XMMWORD[32+rbp],xmm14
1242         call    _bsaes_decrypt8
1243         pxor    xmm15,XMMWORD[32+rbp]
1244         movdqu  xmm7,XMMWORD[r12]
1245         movdqu  xmm8,XMMWORD[16+r12]
1246         pxor    xmm0,xmm7
1247         movdqu  xmm9,XMMWORD[32+r12]
1248         pxor    xmm5,xmm8
1249         movdqu  xmm10,XMMWORD[48+r12]
1250         pxor    xmm3,xmm9
1251         movdqu  xmm14,XMMWORD[64+r12]
1252         pxor    xmm1,xmm10
1253         movdqu  XMMWORD[r13],xmm15
1254         movdqu  XMMWORD[16+r13],xmm0
1255         movdqu  XMMWORD[32+r13],xmm5
1256         movdqu  XMMWORD[48+r13],xmm3
1257         movdqu  XMMWORD[64+r13],xmm1
1258         jmp     NEAR $L$cbc_dec_done
1259 ALIGN   16
1260 $L$cbc_dec_four:
1261         movdqa  XMMWORD[32+rbp],xmm14
1262         call    _bsaes_decrypt8
1263         pxor    xmm15,XMMWORD[32+rbp]
1264         movdqu  xmm7,XMMWORD[r12]
1265         movdqu  xmm8,XMMWORD[16+r12]
1266         pxor    xmm0,xmm7
1267         movdqu  xmm9,XMMWORD[32+r12]
1268         pxor    xmm5,xmm8
1269         movdqu  xmm14,XMMWORD[48+r12]
1270         pxor    xmm3,xmm9
1271         movdqu  XMMWORD[r13],xmm15
1272         movdqu  XMMWORD[16+r13],xmm0
1273         movdqu  XMMWORD[32+r13],xmm5
1274         movdqu  XMMWORD[48+r13],xmm3
1275         jmp     NEAR $L$cbc_dec_done
1276 ALIGN   16
1277 $L$cbc_dec_three:
1278         movdqa  XMMWORD[32+rbp],xmm14
1279         call    _bsaes_decrypt8
1280         pxor    xmm15,XMMWORD[32+rbp]
1281         movdqu  xmm7,XMMWORD[r12]
1282         movdqu  xmm8,XMMWORD[16+r12]
1283         pxor    xmm0,xmm7
1284         movdqu  xmm14,XMMWORD[32+r12]
1285         pxor    xmm5,xmm8
1286         movdqu  XMMWORD[r13],xmm15
1287         movdqu  XMMWORD[16+r13],xmm0
1288         movdqu  XMMWORD[32+r13],xmm5
1289         jmp     NEAR $L$cbc_dec_done
1290 ALIGN   16
1291 $L$cbc_dec_two:
1292         movdqa  XMMWORD[32+rbp],xmm14
1293         call    _bsaes_decrypt8
1294         pxor    xmm15,XMMWORD[32+rbp]
1295         movdqu  xmm7,XMMWORD[r12]
1296         movdqu  xmm14,XMMWORD[16+r12]
1297         pxor    xmm0,xmm7
1298         movdqu  XMMWORD[r13],xmm15
1299         movdqu  XMMWORD[16+r13],xmm0
1300         jmp     NEAR $L$cbc_dec_done
1301 ALIGN   16
1302 $L$cbc_dec_one:
1303         lea     rcx,[r12]
1304         lea     rdx,[32+rbp]
1305         lea     r8,[r15]
1306         call    asm_AES_decrypt
1307         pxor    xmm14,XMMWORD[32+rbp]
1308         movdqu  XMMWORD[r13],xmm14
1309         movdqa  xmm14,xmm15
1310
1311 $L$cbc_dec_done:
1312         movdqu  XMMWORD[rbx],xmm14
1313         lea     rax,[rsp]
1314         pxor    xmm0,xmm0
1315 $L$cbc_dec_bzero:
1316         movdqa  XMMWORD[rax],xmm0
1317         movdqa  XMMWORD[16+rax],xmm0
1318         lea     rax,[32+rax]
1319         cmp     rbp,rax
1320         ja      NEAR $L$cbc_dec_bzero
1321
1322         lea     rsp,[rbp]
1323         movaps  xmm6,XMMWORD[64+rbp]
1324         movaps  xmm7,XMMWORD[80+rbp]
1325         movaps  xmm8,XMMWORD[96+rbp]
1326         movaps  xmm9,XMMWORD[112+rbp]
1327         movaps  xmm10,XMMWORD[128+rbp]
1328         movaps  xmm11,XMMWORD[144+rbp]
1329         movaps  xmm12,XMMWORD[160+rbp]
1330         movaps  xmm13,XMMWORD[176+rbp]
1331         movaps  xmm14,XMMWORD[192+rbp]
1332         movaps  xmm15,XMMWORD[208+rbp]
1333         lea     rsp,[160+rbp]
1334         mov     r15,QWORD[72+rsp]
1335         mov     r14,QWORD[80+rsp]
1336         mov     r13,QWORD[88+rsp]
1337         mov     r12,QWORD[96+rsp]
1338         mov     rbx,QWORD[104+rsp]
1339         mov     rax,QWORD[112+rsp]
1340         lea     rsp,[120+rsp]
1341         mov     rbp,rax
1342 $L$cbc_dec_epilogue:
1343         DB      0F3h,0C3h               ;repret
1344
1345
1346 global  bsaes_ctr32_encrypt_blocks
1347
1348 ALIGN   16
1349 bsaes_ctr32_encrypt_blocks:
1350         mov     rax,rsp
1351 $L$ctr_enc_prologue:
1352         push    rbp
1353         push    rbx
1354         push    r12
1355         push    r13
1356         push    r14
1357         push    r15
1358         lea     rsp,[((-72))+rsp]
1359         mov     r10,QWORD[160+rsp]
1360         lea     rsp,[((-160))+rsp]
1361         movaps  XMMWORD[64+rsp],xmm6
1362         movaps  XMMWORD[80+rsp],xmm7
1363         movaps  XMMWORD[96+rsp],xmm8
1364         movaps  XMMWORD[112+rsp],xmm9
1365         movaps  XMMWORD[128+rsp],xmm10
1366         movaps  XMMWORD[144+rsp],xmm11
1367         movaps  XMMWORD[160+rsp],xmm12
1368         movaps  XMMWORD[176+rsp],xmm13
1369         movaps  XMMWORD[192+rsp],xmm14
1370         movaps  XMMWORD[208+rsp],xmm15
1371 $L$ctr_enc_body:
1372         mov     rbp,rsp
1373         movdqu  xmm0,XMMWORD[r10]
1374         mov     eax,DWORD[240+r9]
1375         mov     r12,rcx
1376         mov     r13,rdx
1377         mov     r14,r8
1378         mov     r15,r9
1379         movdqa  XMMWORD[32+rbp],xmm0
1380         cmp     r8,8
1381         jb      NEAR $L$ctr_enc_short
1382
1383         mov     ebx,eax
1384         shl     rax,7
1385         sub     rax,96
1386         sub     rsp,rax
1387
1388         mov     rax,rsp
1389         mov     rcx,r15
1390         mov     r10d,ebx
1391         call    _bsaes_key_convert
1392         pxor    xmm7,xmm6
1393         movdqa  XMMWORD[rax],xmm7
1394
1395         movdqa  xmm8,XMMWORD[rsp]
1396         lea     r11,[$L$ADD1]
1397         movdqa  xmm15,XMMWORD[32+rbp]
1398         movdqa  xmm7,XMMWORD[((-32))+r11]
1399 DB      102,68,15,56,0,199
1400 DB      102,68,15,56,0,255
1401         movdqa  XMMWORD[rsp],xmm8
1402         jmp     NEAR $L$ctr_enc_loop
1403 ALIGN   16
1404 $L$ctr_enc_loop:
1405         movdqa  XMMWORD[32+rbp],xmm15
1406         movdqa  xmm0,xmm15
1407         movdqa  xmm1,xmm15
1408         paddd   xmm0,XMMWORD[r11]
1409         movdqa  xmm2,xmm15
1410         paddd   xmm1,XMMWORD[16+r11]
1411         movdqa  xmm3,xmm15
1412         paddd   xmm2,XMMWORD[32+r11]
1413         movdqa  xmm4,xmm15
1414         paddd   xmm3,XMMWORD[48+r11]
1415         movdqa  xmm5,xmm15
1416         paddd   xmm4,XMMWORD[64+r11]
1417         movdqa  xmm6,xmm15
1418         paddd   xmm5,XMMWORD[80+r11]
1419         paddd   xmm6,XMMWORD[96+r11]
1420
1421
1422
1423         movdqa  xmm8,XMMWORD[rsp]
1424         lea     rax,[16+rsp]
1425         movdqa  xmm7,XMMWORD[((-16))+r11]
1426         pxor    xmm15,xmm8
1427         pxor    xmm0,xmm8
1428         pxor    xmm1,xmm8
1429         pxor    xmm2,xmm8
1430 DB      102,68,15,56,0,255
1431 DB      102,15,56,0,199
1432         pxor    xmm3,xmm8
1433         pxor    xmm4,xmm8
1434 DB      102,15,56,0,207
1435 DB      102,15,56,0,215
1436         pxor    xmm5,xmm8
1437         pxor    xmm6,xmm8
1438 DB      102,15,56,0,223
1439 DB      102,15,56,0,231
1440 DB      102,15,56,0,239
1441 DB      102,15,56,0,247
1442         lea     r11,[$L$BS0]
1443         mov     r10d,ebx
1444
1445         call    _bsaes_encrypt8_bitslice
1446
1447         sub     r14,8
1448         jc      NEAR $L$ctr_enc_loop_done
1449
1450         movdqu  xmm7,XMMWORD[r12]
1451         movdqu  xmm8,XMMWORD[16+r12]
1452         movdqu  xmm9,XMMWORD[32+r12]
1453         movdqu  xmm10,XMMWORD[48+r12]
1454         movdqu  xmm11,XMMWORD[64+r12]
1455         movdqu  xmm12,XMMWORD[80+r12]
1456         movdqu  xmm13,XMMWORD[96+r12]
1457         movdqu  xmm14,XMMWORD[112+r12]
1458         lea     r12,[128+r12]
1459         pxor    xmm7,xmm15
1460         movdqa  xmm15,XMMWORD[32+rbp]
1461         pxor    xmm0,xmm8
1462         movdqu  XMMWORD[r13],xmm7
1463         pxor    xmm3,xmm9
1464         movdqu  XMMWORD[16+r13],xmm0
1465         pxor    xmm5,xmm10
1466         movdqu  XMMWORD[32+r13],xmm3
1467         pxor    xmm2,xmm11
1468         movdqu  XMMWORD[48+r13],xmm5
1469         pxor    xmm6,xmm12
1470         movdqu  XMMWORD[64+r13],xmm2
1471         pxor    xmm1,xmm13
1472         movdqu  XMMWORD[80+r13],xmm6
1473         pxor    xmm4,xmm14
1474         movdqu  XMMWORD[96+r13],xmm1
1475         lea     r11,[$L$ADD1]
1476         movdqu  XMMWORD[112+r13],xmm4
1477         lea     r13,[128+r13]
1478         paddd   xmm15,XMMWORD[112+r11]
1479         jnz     NEAR $L$ctr_enc_loop
1480
1481         jmp     NEAR $L$ctr_enc_done
1482 ALIGN   16
1483 $L$ctr_enc_loop_done:
1484         add     r14,8
1485         movdqu  xmm7,XMMWORD[r12]
1486         pxor    xmm15,xmm7
1487         movdqu  XMMWORD[r13],xmm15
1488         cmp     r14,2
1489         jb      NEAR $L$ctr_enc_done
1490         movdqu  xmm8,XMMWORD[16+r12]
1491         pxor    xmm0,xmm8
1492         movdqu  XMMWORD[16+r13],xmm0
1493         je      NEAR $L$ctr_enc_done
1494         movdqu  xmm9,XMMWORD[32+r12]
1495         pxor    xmm3,xmm9
1496         movdqu  XMMWORD[32+r13],xmm3
1497         cmp     r14,4
1498         jb      NEAR $L$ctr_enc_done
1499         movdqu  xmm10,XMMWORD[48+r12]
1500         pxor    xmm5,xmm10
1501         movdqu  XMMWORD[48+r13],xmm5
1502         je      NEAR $L$ctr_enc_done
1503         movdqu  xmm11,XMMWORD[64+r12]
1504         pxor    xmm2,xmm11
1505         movdqu  XMMWORD[64+r13],xmm2
1506         cmp     r14,6
1507         jb      NEAR $L$ctr_enc_done
1508         movdqu  xmm12,XMMWORD[80+r12]
1509         pxor    xmm6,xmm12
1510         movdqu  XMMWORD[80+r13],xmm6
1511         je      NEAR $L$ctr_enc_done
1512         movdqu  xmm13,XMMWORD[96+r12]
1513         pxor    xmm1,xmm13
1514         movdqu  XMMWORD[96+r13],xmm1
1515         jmp     NEAR $L$ctr_enc_done
1516
1517 ALIGN   16
1518 $L$ctr_enc_short:
1519         lea     rcx,[32+rbp]
1520         lea     rdx,[48+rbp]
1521         lea     r8,[r15]
1522         call    asm_AES_encrypt
1523         movdqu  xmm0,XMMWORD[r12]
1524         lea     r12,[16+r12]
1525         mov     eax,DWORD[44+rbp]
1526         bswap   eax
1527         pxor    xmm0,XMMWORD[48+rbp]
1528         inc     eax
1529         movdqu  XMMWORD[r13],xmm0
1530         bswap   eax
1531         lea     r13,[16+r13]
1532         mov     DWORD[44+rsp],eax
1533         dec     r14
1534         jnz     NEAR $L$ctr_enc_short
1535
1536 $L$ctr_enc_done:
1537         lea     rax,[rsp]
1538         pxor    xmm0,xmm0
1539 $L$ctr_enc_bzero:
1540         movdqa  XMMWORD[rax],xmm0
1541         movdqa  XMMWORD[16+rax],xmm0
1542         lea     rax,[32+rax]
1543         cmp     rbp,rax
1544         ja      NEAR $L$ctr_enc_bzero
1545
1546         lea     rsp,[rbp]
1547         movaps  xmm6,XMMWORD[64+rbp]
1548         movaps  xmm7,XMMWORD[80+rbp]
1549         movaps  xmm8,XMMWORD[96+rbp]
1550         movaps  xmm9,XMMWORD[112+rbp]
1551         movaps  xmm10,XMMWORD[128+rbp]
1552         movaps  xmm11,XMMWORD[144+rbp]
1553         movaps  xmm12,XMMWORD[160+rbp]
1554         movaps  xmm13,XMMWORD[176+rbp]
1555         movaps  xmm14,XMMWORD[192+rbp]
1556         movaps  xmm15,XMMWORD[208+rbp]
1557         lea     rsp,[160+rbp]
1558         mov     r15,QWORD[72+rsp]
1559         mov     r14,QWORD[80+rsp]
1560         mov     r13,QWORD[88+rsp]
1561         mov     r12,QWORD[96+rsp]
1562         mov     rbx,QWORD[104+rsp]
1563         mov     rax,QWORD[112+rsp]
1564         lea     rsp,[120+rsp]
1565         mov     rbp,rax
1566 $L$ctr_enc_epilogue:
1567         DB      0F3h,0C3h               ;repret
1568
1569 global  bsaes_xts_encrypt
1570
1571 ALIGN   16
1572 bsaes_xts_encrypt:
1573         mov     rax,rsp
1574 $L$xts_enc_prologue:
1575         push    rbp
1576         push    rbx
1577         push    r12
1578         push    r13
1579         push    r14
1580         push    r15
1581         lea     rsp,[((-72))+rsp]
1582         mov     r10,QWORD[160+rsp]
1583         mov     r11,QWORD[168+rsp]
1584         lea     rsp,[((-160))+rsp]
1585         movaps  XMMWORD[64+rsp],xmm6
1586         movaps  XMMWORD[80+rsp],xmm7
1587         movaps  XMMWORD[96+rsp],xmm8
1588         movaps  XMMWORD[112+rsp],xmm9
1589         movaps  XMMWORD[128+rsp],xmm10
1590         movaps  XMMWORD[144+rsp],xmm11
1591         movaps  XMMWORD[160+rsp],xmm12
1592         movaps  XMMWORD[176+rsp],xmm13
1593         movaps  XMMWORD[192+rsp],xmm14
1594         movaps  XMMWORD[208+rsp],xmm15
1595 $L$xts_enc_body:
1596         mov     rbp,rsp
1597         mov     r12,rcx
1598         mov     r13,rdx
1599         mov     r14,r8
1600         mov     r15,r9
1601
1602         lea     rcx,[r11]
1603         lea     rdx,[32+rbp]
1604         lea     r8,[r10]
1605         call    asm_AES_encrypt
1606
1607         mov     eax,DWORD[240+r15]
1608         mov     rbx,r14
1609
1610         mov     edx,eax
1611         shl     rax,7
1612         sub     rax,96
1613         sub     rsp,rax
1614
1615         mov     rax,rsp
1616         mov     rcx,r15
1617         mov     r10d,edx
1618         call    _bsaes_key_convert
1619         pxor    xmm7,xmm6
1620         movdqa  XMMWORD[rax],xmm7
1621
1622         and     r14,-16
1623         sub     rsp,0x80
1624         movdqa  xmm6,XMMWORD[32+rbp]
1625
1626         pxor    xmm14,xmm14
1627         movdqa  xmm12,XMMWORD[$L$xts_magic]
1628         pcmpgtd xmm14,xmm6
1629
1630         sub     r14,0x80
1631         jc      NEAR $L$xts_enc_short
1632         jmp     NEAR $L$xts_enc_loop
1633
1634 ALIGN   16
1635 $L$xts_enc_loop:
1636         pshufd  xmm13,xmm14,0x13
1637         pxor    xmm14,xmm14
1638         movdqa  xmm15,xmm6
1639         movdqa  XMMWORD[rsp],xmm6
1640         paddq   xmm6,xmm6
1641         pand    xmm13,xmm12
1642         pcmpgtd xmm14,xmm6
1643         pxor    xmm6,xmm13
1644         pshufd  xmm13,xmm14,0x13
1645         pxor    xmm14,xmm14
1646         movdqa  xmm0,xmm6
1647         movdqa  XMMWORD[16+rsp],xmm6
1648         paddq   xmm6,xmm6
1649         pand    xmm13,xmm12
1650         pcmpgtd xmm14,xmm6
1651         pxor    xmm6,xmm13
1652         movdqu  xmm7,XMMWORD[r12]
1653         pshufd  xmm13,xmm14,0x13
1654         pxor    xmm14,xmm14
1655         movdqa  xmm1,xmm6
1656         movdqa  XMMWORD[32+rsp],xmm6
1657         paddq   xmm6,xmm6
1658         pand    xmm13,xmm12
1659         pcmpgtd xmm14,xmm6
1660         pxor    xmm6,xmm13
1661         movdqu  xmm8,XMMWORD[16+r12]
1662         pxor    xmm15,xmm7
1663         pshufd  xmm13,xmm14,0x13
1664         pxor    xmm14,xmm14
1665         movdqa  xmm2,xmm6
1666         movdqa  XMMWORD[48+rsp],xmm6
1667         paddq   xmm6,xmm6
1668         pand    xmm13,xmm12
1669         pcmpgtd xmm14,xmm6
1670         pxor    xmm6,xmm13
1671         movdqu  xmm9,XMMWORD[32+r12]
1672         pxor    xmm0,xmm8
1673         pshufd  xmm13,xmm14,0x13
1674         pxor    xmm14,xmm14
1675         movdqa  xmm3,xmm6
1676         movdqa  XMMWORD[64+rsp],xmm6
1677         paddq   xmm6,xmm6
1678         pand    xmm13,xmm12
1679         pcmpgtd xmm14,xmm6
1680         pxor    xmm6,xmm13
1681         movdqu  xmm10,XMMWORD[48+r12]
1682         pxor    xmm1,xmm9
1683         pshufd  xmm13,xmm14,0x13
1684         pxor    xmm14,xmm14
1685         movdqa  xmm4,xmm6
1686         movdqa  XMMWORD[80+rsp],xmm6
1687         paddq   xmm6,xmm6
1688         pand    xmm13,xmm12
1689         pcmpgtd xmm14,xmm6
1690         pxor    xmm6,xmm13
1691         movdqu  xmm11,XMMWORD[64+r12]
1692         pxor    xmm2,xmm10
1693         pshufd  xmm13,xmm14,0x13
1694         pxor    xmm14,xmm14
1695         movdqa  xmm5,xmm6
1696         movdqa  XMMWORD[96+rsp],xmm6
1697         paddq   xmm6,xmm6
1698         pand    xmm13,xmm12
1699         pcmpgtd xmm14,xmm6
1700         pxor    xmm6,xmm13
1701         movdqu  xmm12,XMMWORD[80+r12]
1702         pxor    xmm3,xmm11
1703         movdqu  xmm13,XMMWORD[96+r12]
1704         pxor    xmm4,xmm12
1705         movdqu  xmm14,XMMWORD[112+r12]
1706         lea     r12,[128+r12]
1707         movdqa  XMMWORD[112+rsp],xmm6
1708         pxor    xmm5,xmm13
1709         lea     rax,[128+rsp]
1710         pxor    xmm6,xmm14
1711         mov     r10d,edx
1712
1713         call    _bsaes_encrypt8
1714
1715         pxor    xmm15,XMMWORD[rsp]
1716         pxor    xmm0,XMMWORD[16+rsp]
1717         movdqu  XMMWORD[r13],xmm15
1718         pxor    xmm3,XMMWORD[32+rsp]
1719         movdqu  XMMWORD[16+r13],xmm0
1720         pxor    xmm5,XMMWORD[48+rsp]
1721         movdqu  XMMWORD[32+r13],xmm3
1722         pxor    xmm2,XMMWORD[64+rsp]
1723         movdqu  XMMWORD[48+r13],xmm5
1724         pxor    xmm6,XMMWORD[80+rsp]
1725         movdqu  XMMWORD[64+r13],xmm2
1726         pxor    xmm1,XMMWORD[96+rsp]
1727         movdqu  XMMWORD[80+r13],xmm6
1728         pxor    xmm4,XMMWORD[112+rsp]
1729         movdqu  XMMWORD[96+r13],xmm1
1730         movdqu  XMMWORD[112+r13],xmm4
1731         lea     r13,[128+r13]
1732
1733         movdqa  xmm6,XMMWORD[112+rsp]
1734         pxor    xmm14,xmm14
1735         movdqa  xmm12,XMMWORD[$L$xts_magic]
1736         pcmpgtd xmm14,xmm6
1737         pshufd  xmm13,xmm14,0x13
1738         pxor    xmm14,xmm14
1739         paddq   xmm6,xmm6
1740         pand    xmm13,xmm12
1741         pcmpgtd xmm14,xmm6
1742         pxor    xmm6,xmm13
1743
1744         sub     r14,0x80
1745         jnc     NEAR $L$xts_enc_loop
1746
1747 $L$xts_enc_short:
1748         add     r14,0x80
1749         jz      NEAR $L$xts_enc_done
1750         pshufd  xmm13,xmm14,0x13
1751         pxor    xmm14,xmm14
1752         movdqa  xmm15,xmm6
1753         movdqa  XMMWORD[rsp],xmm6
1754         paddq   xmm6,xmm6
1755         pand    xmm13,xmm12
1756         pcmpgtd xmm14,xmm6
1757         pxor    xmm6,xmm13
1758         pshufd  xmm13,xmm14,0x13
1759         pxor    xmm14,xmm14
1760         movdqa  xmm0,xmm6
1761         movdqa  XMMWORD[16+rsp],xmm6
1762         paddq   xmm6,xmm6
1763         pand    xmm13,xmm12
1764         pcmpgtd xmm14,xmm6
1765         pxor    xmm6,xmm13
1766         movdqu  xmm7,XMMWORD[r12]
1767         cmp     r14,16
1768         je      NEAR $L$xts_enc_1
1769         pshufd  xmm13,xmm14,0x13
1770         pxor    xmm14,xmm14
1771         movdqa  xmm1,xmm6
1772         movdqa  XMMWORD[32+rsp],xmm6
1773         paddq   xmm6,xmm6
1774         pand    xmm13,xmm12
1775         pcmpgtd xmm14,xmm6
1776         pxor    xmm6,xmm13
1777         movdqu  xmm8,XMMWORD[16+r12]
1778         cmp     r14,32
1779         je      NEAR $L$xts_enc_2
1780         pxor    xmm15,xmm7
1781         pshufd  xmm13,xmm14,0x13
1782         pxor    xmm14,xmm14
1783         movdqa  xmm2,xmm6
1784         movdqa  XMMWORD[48+rsp],xmm6
1785         paddq   xmm6,xmm6
1786         pand    xmm13,xmm12
1787         pcmpgtd xmm14,xmm6
1788         pxor    xmm6,xmm13
1789         movdqu  xmm9,XMMWORD[32+r12]
1790         cmp     r14,48
1791         je      NEAR $L$xts_enc_3
1792         pxor    xmm0,xmm8
1793         pshufd  xmm13,xmm14,0x13
1794         pxor    xmm14,xmm14
1795         movdqa  xmm3,xmm6
1796         movdqa  XMMWORD[64+rsp],xmm6
1797         paddq   xmm6,xmm6
1798         pand    xmm13,xmm12
1799         pcmpgtd xmm14,xmm6
1800         pxor    xmm6,xmm13
1801         movdqu  xmm10,XMMWORD[48+r12]
1802         cmp     r14,64
1803         je      NEAR $L$xts_enc_4
1804         pxor    xmm1,xmm9
1805         pshufd  xmm13,xmm14,0x13
1806         pxor    xmm14,xmm14
1807         movdqa  xmm4,xmm6
1808         movdqa  XMMWORD[80+rsp],xmm6
1809         paddq   xmm6,xmm6
1810         pand    xmm13,xmm12
1811         pcmpgtd xmm14,xmm6
1812         pxor    xmm6,xmm13
1813         movdqu  xmm11,XMMWORD[64+r12]
1814         cmp     r14,80
1815         je      NEAR $L$xts_enc_5
1816         pxor    xmm2,xmm10
1817         pshufd  xmm13,xmm14,0x13
1818         pxor    xmm14,xmm14
1819         movdqa  xmm5,xmm6
1820         movdqa  XMMWORD[96+rsp],xmm6
1821         paddq   xmm6,xmm6
1822         pand    xmm13,xmm12
1823         pcmpgtd xmm14,xmm6
1824         pxor    xmm6,xmm13
1825         movdqu  xmm12,XMMWORD[80+r12]
1826         cmp     r14,96
1827         je      NEAR $L$xts_enc_6
1828         pxor    xmm3,xmm11
1829         movdqu  xmm13,XMMWORD[96+r12]
1830         pxor    xmm4,xmm12
1831         movdqa  XMMWORD[112+rsp],xmm6
1832         lea     r12,[112+r12]
1833         pxor    xmm5,xmm13
1834         lea     rax,[128+rsp]
1835         mov     r10d,edx
1836
1837         call    _bsaes_encrypt8
1838
1839         pxor    xmm15,XMMWORD[rsp]
1840         pxor    xmm0,XMMWORD[16+rsp]
1841         movdqu  XMMWORD[r13],xmm15
1842         pxor    xmm3,XMMWORD[32+rsp]
1843         movdqu  XMMWORD[16+r13],xmm0
1844         pxor    xmm5,XMMWORD[48+rsp]
1845         movdqu  XMMWORD[32+r13],xmm3
1846         pxor    xmm2,XMMWORD[64+rsp]
1847         movdqu  XMMWORD[48+r13],xmm5
1848         pxor    xmm6,XMMWORD[80+rsp]
1849         movdqu  XMMWORD[64+r13],xmm2
1850         pxor    xmm1,XMMWORD[96+rsp]
1851         movdqu  XMMWORD[80+r13],xmm6
1852         movdqu  XMMWORD[96+r13],xmm1
1853         lea     r13,[112+r13]
1854
1855         movdqa  xmm6,XMMWORD[112+rsp]
1856         jmp     NEAR $L$xts_enc_done
1857 ALIGN   16
1858 $L$xts_enc_6:
1859         pxor    xmm3,xmm11
1860         lea     r12,[96+r12]
1861         pxor    xmm4,xmm12
1862         lea     rax,[128+rsp]
1863         mov     r10d,edx
1864
1865         call    _bsaes_encrypt8
1866
1867         pxor    xmm15,XMMWORD[rsp]
1868         pxor    xmm0,XMMWORD[16+rsp]
1869         movdqu  XMMWORD[r13],xmm15
1870         pxor    xmm3,XMMWORD[32+rsp]
1871         movdqu  XMMWORD[16+r13],xmm0
1872         pxor    xmm5,XMMWORD[48+rsp]
1873         movdqu  XMMWORD[32+r13],xmm3
1874         pxor    xmm2,XMMWORD[64+rsp]
1875         movdqu  XMMWORD[48+r13],xmm5
1876         pxor    xmm6,XMMWORD[80+rsp]
1877         movdqu  XMMWORD[64+r13],xmm2
1878         movdqu  XMMWORD[80+r13],xmm6
1879         lea     r13,[96+r13]
1880
1881         movdqa  xmm6,XMMWORD[96+rsp]
1882         jmp     NEAR $L$xts_enc_done
1883 ALIGN   16
1884 $L$xts_enc_5:
1885         pxor    xmm2,xmm10
1886         lea     r12,[80+r12]
1887         pxor    xmm3,xmm11
1888         lea     rax,[128+rsp]
1889         mov     r10d,edx
1890
1891         call    _bsaes_encrypt8
1892
1893         pxor    xmm15,XMMWORD[rsp]
1894         pxor    xmm0,XMMWORD[16+rsp]
1895         movdqu  XMMWORD[r13],xmm15
1896         pxor    xmm3,XMMWORD[32+rsp]
1897         movdqu  XMMWORD[16+r13],xmm0
1898         pxor    xmm5,XMMWORD[48+rsp]
1899         movdqu  XMMWORD[32+r13],xmm3
1900         pxor    xmm2,XMMWORD[64+rsp]
1901         movdqu  XMMWORD[48+r13],xmm5
1902         movdqu  XMMWORD[64+r13],xmm2
1903         lea     r13,[80+r13]
1904
1905         movdqa  xmm6,XMMWORD[80+rsp]
1906         jmp     NEAR $L$xts_enc_done
1907 ALIGN   16
1908 $L$xts_enc_4:
1909         pxor    xmm1,xmm9
1910         lea     r12,[64+r12]
1911         pxor    xmm2,xmm10
1912         lea     rax,[128+rsp]
1913         mov     r10d,edx
1914
1915         call    _bsaes_encrypt8
1916
1917         pxor    xmm15,XMMWORD[rsp]
1918         pxor    xmm0,XMMWORD[16+rsp]
1919         movdqu  XMMWORD[r13],xmm15
1920         pxor    xmm3,XMMWORD[32+rsp]
1921         movdqu  XMMWORD[16+r13],xmm0
1922         pxor    xmm5,XMMWORD[48+rsp]
1923         movdqu  XMMWORD[32+r13],xmm3
1924         movdqu  XMMWORD[48+r13],xmm5
1925         lea     r13,[64+r13]
1926
1927         movdqa  xmm6,XMMWORD[64+rsp]
1928         jmp     NEAR $L$xts_enc_done
1929 ALIGN   16
1930 $L$xts_enc_3:
1931         pxor    xmm0,xmm8
1932         lea     r12,[48+r12]
1933         pxor    xmm1,xmm9
1934         lea     rax,[128+rsp]
1935         mov     r10d,edx
1936
1937         call    _bsaes_encrypt8
1938
1939         pxor    xmm15,XMMWORD[rsp]
1940         pxor    xmm0,XMMWORD[16+rsp]
1941         movdqu  XMMWORD[r13],xmm15
1942         pxor    xmm3,XMMWORD[32+rsp]
1943         movdqu  XMMWORD[16+r13],xmm0
1944         movdqu  XMMWORD[32+r13],xmm3
1945         lea     r13,[48+r13]
1946
1947         movdqa  xmm6,XMMWORD[48+rsp]
1948         jmp     NEAR $L$xts_enc_done
1949 ALIGN   16
1950 $L$xts_enc_2:
1951         pxor    xmm15,xmm7
1952         lea     r12,[32+r12]
1953         pxor    xmm0,xmm8
1954         lea     rax,[128+rsp]
1955         mov     r10d,edx
1956
1957         call    _bsaes_encrypt8
1958
1959         pxor    xmm15,XMMWORD[rsp]
1960         pxor    xmm0,XMMWORD[16+rsp]
1961         movdqu  XMMWORD[r13],xmm15
1962         movdqu  XMMWORD[16+r13],xmm0
1963         lea     r13,[32+r13]
1964
1965         movdqa  xmm6,XMMWORD[32+rsp]
1966         jmp     NEAR $L$xts_enc_done
1967 ALIGN   16
1968 $L$xts_enc_1:
1969         pxor    xmm7,xmm15
1970         lea     r12,[16+r12]
1971         movdqa  XMMWORD[32+rbp],xmm7
1972         lea     rcx,[32+rbp]
1973         lea     rdx,[32+rbp]
1974         lea     r8,[r15]
1975         call    asm_AES_encrypt
1976         pxor    xmm15,XMMWORD[32+rbp]
1977
1978
1979
1980
1981
1982         movdqu  XMMWORD[r13],xmm15
1983         lea     r13,[16+r13]
1984
1985         movdqa  xmm6,XMMWORD[16+rsp]
1986
1987 $L$xts_enc_done:
1988         and     ebx,15
1989         jz      NEAR $L$xts_enc_ret
1990         mov     rdx,r13
1991
1992 $L$xts_enc_steal:
1993         movzx   eax,BYTE[r12]
1994         movzx   ecx,BYTE[((-16))+rdx]
1995         lea     r12,[1+r12]
1996         mov     BYTE[((-16))+rdx],al
1997         mov     BYTE[rdx],cl
1998         lea     rdx,[1+rdx]
1999         sub     ebx,1
2000         jnz     NEAR $L$xts_enc_steal
2001
2002         movdqu  xmm15,XMMWORD[((-16))+r13]
2003         lea     rcx,[32+rbp]
2004         pxor    xmm15,xmm6
2005         lea     rdx,[32+rbp]
2006         movdqa  XMMWORD[32+rbp],xmm15
2007         lea     r8,[r15]
2008         call    asm_AES_encrypt
2009         pxor    xmm6,XMMWORD[32+rbp]
2010         movdqu  XMMWORD[(-16)+r13],xmm6
2011
2012 $L$xts_enc_ret:
2013         lea     rax,[rsp]
2014         pxor    xmm0,xmm0
2015 $L$xts_enc_bzero:
2016         movdqa  XMMWORD[rax],xmm0
2017         movdqa  XMMWORD[16+rax],xmm0
2018         lea     rax,[32+rax]
2019         cmp     rbp,rax
2020         ja      NEAR $L$xts_enc_bzero
2021
2022         lea     rsp,[rbp]
2023         movaps  xmm6,XMMWORD[64+rbp]
2024         movaps  xmm7,XMMWORD[80+rbp]
2025         movaps  xmm8,XMMWORD[96+rbp]
2026         movaps  xmm9,XMMWORD[112+rbp]
2027         movaps  xmm10,XMMWORD[128+rbp]
2028         movaps  xmm11,XMMWORD[144+rbp]
2029         movaps  xmm12,XMMWORD[160+rbp]
2030         movaps  xmm13,XMMWORD[176+rbp]
2031         movaps  xmm14,XMMWORD[192+rbp]
2032         movaps  xmm15,XMMWORD[208+rbp]
2033         lea     rsp,[160+rbp]
2034         mov     r15,QWORD[72+rsp]
2035         mov     r14,QWORD[80+rsp]
2036         mov     r13,QWORD[88+rsp]
2037         mov     r12,QWORD[96+rsp]
2038         mov     rbx,QWORD[104+rsp]
2039         mov     rax,QWORD[112+rsp]
2040         lea     rsp,[120+rsp]
2041         mov     rbp,rax
2042 $L$xts_enc_epilogue:
2043         DB      0F3h,0C3h               ;repret
2044
2045
2046 global  bsaes_xts_decrypt
2047
2048 ALIGN   16
2049 bsaes_xts_decrypt:
2050         mov     rax,rsp
2051 $L$xts_dec_prologue:
2052         push    rbp
2053         push    rbx
2054         push    r12
2055         push    r13
2056         push    r14
2057         push    r15
2058         lea     rsp,[((-72))+rsp]
2059         mov     r10,QWORD[160+rsp]
2060         mov     r11,QWORD[168+rsp]
2061         lea     rsp,[((-160))+rsp]
2062         movaps  XMMWORD[64+rsp],xmm6
2063         movaps  XMMWORD[80+rsp],xmm7
2064         movaps  XMMWORD[96+rsp],xmm8
2065         movaps  XMMWORD[112+rsp],xmm9
2066         movaps  XMMWORD[128+rsp],xmm10
2067         movaps  XMMWORD[144+rsp],xmm11
2068         movaps  XMMWORD[160+rsp],xmm12
2069         movaps  XMMWORD[176+rsp],xmm13
2070         movaps  XMMWORD[192+rsp],xmm14
2071         movaps  XMMWORD[208+rsp],xmm15
2072 $L$xts_dec_body:
2073         mov     rbp,rsp
2074         mov     r12,rcx
2075         mov     r13,rdx
2076         mov     r14,r8
2077         mov     r15,r9
2078
2079         lea     rcx,[r11]
2080         lea     rdx,[32+rbp]
2081         lea     r8,[r10]
2082         call    asm_AES_encrypt
2083
2084         mov     eax,DWORD[240+r15]
2085         mov     rbx,r14
2086
2087         mov     edx,eax
2088         shl     rax,7
2089         sub     rax,96
2090         sub     rsp,rax
2091
2092         mov     rax,rsp
2093         mov     rcx,r15
2094         mov     r10d,edx
2095         call    _bsaes_key_convert
2096         pxor    xmm7,XMMWORD[rsp]
2097         movdqa  XMMWORD[rax],xmm6
2098         movdqa  XMMWORD[rsp],xmm7
2099
2100         xor     eax,eax
2101         and     r14,-16
2102         test    ebx,15
2103         setnz   al
2104         shl     rax,4
2105         sub     r14,rax
2106
2107         sub     rsp,0x80
2108         movdqa  xmm6,XMMWORD[32+rbp]
2109
2110         pxor    xmm14,xmm14
2111         movdqa  xmm12,XMMWORD[$L$xts_magic]
2112         pcmpgtd xmm14,xmm6
2113
2114         sub     r14,0x80
2115         jc      NEAR $L$xts_dec_short
2116         jmp     NEAR $L$xts_dec_loop
2117
2118 ALIGN   16
2119 $L$xts_dec_loop:
2120         pshufd  xmm13,xmm14,0x13
2121         pxor    xmm14,xmm14
2122         movdqa  xmm15,xmm6
2123         movdqa  XMMWORD[rsp],xmm6
2124         paddq   xmm6,xmm6
2125         pand    xmm13,xmm12
2126         pcmpgtd xmm14,xmm6
2127         pxor    xmm6,xmm13
2128         pshufd  xmm13,xmm14,0x13
2129         pxor    xmm14,xmm14
2130         movdqa  xmm0,xmm6
2131         movdqa  XMMWORD[16+rsp],xmm6
2132         paddq   xmm6,xmm6
2133         pand    xmm13,xmm12
2134         pcmpgtd xmm14,xmm6
2135         pxor    xmm6,xmm13
2136         movdqu  xmm7,XMMWORD[r12]
2137         pshufd  xmm13,xmm14,0x13
2138         pxor    xmm14,xmm14
2139         movdqa  xmm1,xmm6
2140         movdqa  XMMWORD[32+rsp],xmm6
2141         paddq   xmm6,xmm6
2142         pand    xmm13,xmm12
2143         pcmpgtd xmm14,xmm6
2144         pxor    xmm6,xmm13
2145         movdqu  xmm8,XMMWORD[16+r12]
2146         pxor    xmm15,xmm7
2147         pshufd  xmm13,xmm14,0x13
2148         pxor    xmm14,xmm14
2149         movdqa  xmm2,xmm6
2150         movdqa  XMMWORD[48+rsp],xmm6
2151         paddq   xmm6,xmm6
2152         pand    xmm13,xmm12
2153         pcmpgtd xmm14,xmm6
2154         pxor    xmm6,xmm13
2155         movdqu  xmm9,XMMWORD[32+r12]
2156         pxor    xmm0,xmm8
2157         pshufd  xmm13,xmm14,0x13
2158         pxor    xmm14,xmm14
2159         movdqa  xmm3,xmm6
2160         movdqa  XMMWORD[64+rsp],xmm6
2161         paddq   xmm6,xmm6
2162         pand    xmm13,xmm12
2163         pcmpgtd xmm14,xmm6
2164         pxor    xmm6,xmm13
2165         movdqu  xmm10,XMMWORD[48+r12]
2166         pxor    xmm1,xmm9
2167         pshufd  xmm13,xmm14,0x13
2168         pxor    xmm14,xmm14
2169         movdqa  xmm4,xmm6
2170         movdqa  XMMWORD[80+rsp],xmm6
2171         paddq   xmm6,xmm6
2172         pand    xmm13,xmm12
2173         pcmpgtd xmm14,xmm6
2174         pxor    xmm6,xmm13
2175         movdqu  xmm11,XMMWORD[64+r12]
2176         pxor    xmm2,xmm10
2177         pshufd  xmm13,xmm14,0x13
2178         pxor    xmm14,xmm14
2179         movdqa  xmm5,xmm6
2180         movdqa  XMMWORD[96+rsp],xmm6
2181         paddq   xmm6,xmm6
2182         pand    xmm13,xmm12
2183         pcmpgtd xmm14,xmm6
2184         pxor    xmm6,xmm13
2185         movdqu  xmm12,XMMWORD[80+r12]
2186         pxor    xmm3,xmm11
2187         movdqu  xmm13,XMMWORD[96+r12]
2188         pxor    xmm4,xmm12
2189         movdqu  xmm14,XMMWORD[112+r12]
2190         lea     r12,[128+r12]
2191         movdqa  XMMWORD[112+rsp],xmm6
2192         pxor    xmm5,xmm13
2193         lea     rax,[128+rsp]
2194         pxor    xmm6,xmm14
2195         mov     r10d,edx
2196
2197         call    _bsaes_decrypt8
2198
2199         pxor    xmm15,XMMWORD[rsp]
2200         pxor    xmm0,XMMWORD[16+rsp]
2201         movdqu  XMMWORD[r13],xmm15
2202         pxor    xmm5,XMMWORD[32+rsp]
2203         movdqu  XMMWORD[16+r13],xmm0
2204         pxor    xmm3,XMMWORD[48+rsp]
2205         movdqu  XMMWORD[32+r13],xmm5
2206         pxor    xmm1,XMMWORD[64+rsp]
2207         movdqu  XMMWORD[48+r13],xmm3
2208         pxor    xmm6,XMMWORD[80+rsp]
2209         movdqu  XMMWORD[64+r13],xmm1
2210         pxor    xmm2,XMMWORD[96+rsp]
2211         movdqu  XMMWORD[80+r13],xmm6
2212         pxor    xmm4,XMMWORD[112+rsp]
2213         movdqu  XMMWORD[96+r13],xmm2
2214         movdqu  XMMWORD[112+r13],xmm4
2215         lea     r13,[128+r13]
2216
2217         movdqa  xmm6,XMMWORD[112+rsp]
2218         pxor    xmm14,xmm14
2219         movdqa  xmm12,XMMWORD[$L$xts_magic]
2220         pcmpgtd xmm14,xmm6
2221         pshufd  xmm13,xmm14,0x13
2222         pxor    xmm14,xmm14
2223         paddq   xmm6,xmm6
2224         pand    xmm13,xmm12
2225         pcmpgtd xmm14,xmm6
2226         pxor    xmm6,xmm13
2227
2228         sub     r14,0x80
2229         jnc     NEAR $L$xts_dec_loop
2230
2231 $L$xts_dec_short:
2232         add     r14,0x80
2233         jz      NEAR $L$xts_dec_done
2234         pshufd  xmm13,xmm14,0x13
2235         pxor    xmm14,xmm14
2236         movdqa  xmm15,xmm6
2237         movdqa  XMMWORD[rsp],xmm6
2238         paddq   xmm6,xmm6
2239         pand    xmm13,xmm12
2240         pcmpgtd xmm14,xmm6
2241         pxor    xmm6,xmm13
2242         pshufd  xmm13,xmm14,0x13
2243         pxor    xmm14,xmm14
2244         movdqa  xmm0,xmm6
2245         movdqa  XMMWORD[16+rsp],xmm6
2246         paddq   xmm6,xmm6
2247         pand    xmm13,xmm12
2248         pcmpgtd xmm14,xmm6
2249         pxor    xmm6,xmm13
2250         movdqu  xmm7,XMMWORD[r12]
2251         cmp     r14,16
2252         je      NEAR $L$xts_dec_1
2253         pshufd  xmm13,xmm14,0x13
2254         pxor    xmm14,xmm14
2255         movdqa  xmm1,xmm6
2256         movdqa  XMMWORD[32+rsp],xmm6
2257         paddq   xmm6,xmm6
2258         pand    xmm13,xmm12
2259         pcmpgtd xmm14,xmm6
2260         pxor    xmm6,xmm13
2261         movdqu  xmm8,XMMWORD[16+r12]
2262         cmp     r14,32
2263         je      NEAR $L$xts_dec_2
2264         pxor    xmm15,xmm7
2265         pshufd  xmm13,xmm14,0x13
2266         pxor    xmm14,xmm14
2267         movdqa  xmm2,xmm6
2268         movdqa  XMMWORD[48+rsp],xmm6
2269         paddq   xmm6,xmm6
2270         pand    xmm13,xmm12
2271         pcmpgtd xmm14,xmm6
2272         pxor    xmm6,xmm13
2273         movdqu  xmm9,XMMWORD[32+r12]
2274         cmp     r14,48
2275         je      NEAR $L$xts_dec_3
2276         pxor    xmm0,xmm8
2277         pshufd  xmm13,xmm14,0x13
2278         pxor    xmm14,xmm14
2279         movdqa  xmm3,xmm6
2280         movdqa  XMMWORD[64+rsp],xmm6
2281         paddq   xmm6,xmm6
2282         pand    xmm13,xmm12
2283         pcmpgtd xmm14,xmm6
2284         pxor    xmm6,xmm13
2285         movdqu  xmm10,XMMWORD[48+r12]
2286         cmp     r14,64
2287         je      NEAR $L$xts_dec_4
2288         pxor    xmm1,xmm9
2289         pshufd  xmm13,xmm14,0x13
2290         pxor    xmm14,xmm14
2291         movdqa  xmm4,xmm6
2292         movdqa  XMMWORD[80+rsp],xmm6
2293         paddq   xmm6,xmm6
2294         pand    xmm13,xmm12
2295         pcmpgtd xmm14,xmm6
2296         pxor    xmm6,xmm13
2297         movdqu  xmm11,XMMWORD[64+r12]
2298         cmp     r14,80
2299         je      NEAR $L$xts_dec_5
2300         pxor    xmm2,xmm10
2301         pshufd  xmm13,xmm14,0x13
2302         pxor    xmm14,xmm14
2303         movdqa  xmm5,xmm6
2304         movdqa  XMMWORD[96+rsp],xmm6
2305         paddq   xmm6,xmm6
2306         pand    xmm13,xmm12
2307         pcmpgtd xmm14,xmm6
2308         pxor    xmm6,xmm13
2309         movdqu  xmm12,XMMWORD[80+r12]
2310         cmp     r14,96
2311         je      NEAR $L$xts_dec_6
2312         pxor    xmm3,xmm11
2313         movdqu  xmm13,XMMWORD[96+r12]
2314         pxor    xmm4,xmm12
2315         movdqa  XMMWORD[112+rsp],xmm6
2316         lea     r12,[112+r12]
2317         pxor    xmm5,xmm13
2318         lea     rax,[128+rsp]
2319         mov     r10d,edx
2320
2321         call    _bsaes_decrypt8
2322
2323         pxor    xmm15,XMMWORD[rsp]
2324         pxor    xmm0,XMMWORD[16+rsp]
2325         movdqu  XMMWORD[r13],xmm15
2326         pxor    xmm5,XMMWORD[32+rsp]
2327         movdqu  XMMWORD[16+r13],xmm0
2328         pxor    xmm3,XMMWORD[48+rsp]
2329         movdqu  XMMWORD[32+r13],xmm5
2330         pxor    xmm1,XMMWORD[64+rsp]
2331         movdqu  XMMWORD[48+r13],xmm3
2332         pxor    xmm6,XMMWORD[80+rsp]
2333         movdqu  XMMWORD[64+r13],xmm1
2334         pxor    xmm2,XMMWORD[96+rsp]
2335         movdqu  XMMWORD[80+r13],xmm6
2336         movdqu  XMMWORD[96+r13],xmm2
2337         lea     r13,[112+r13]
2338
2339         movdqa  xmm6,XMMWORD[112+rsp]
2340         jmp     NEAR $L$xts_dec_done
2341 ALIGN   16
2342 $L$xts_dec_6:
2343         pxor    xmm3,xmm11
2344         lea     r12,[96+r12]
2345         pxor    xmm4,xmm12
2346         lea     rax,[128+rsp]
2347         mov     r10d,edx
2348
2349         call    _bsaes_decrypt8
2350
2351         pxor    xmm15,XMMWORD[rsp]
2352         pxor    xmm0,XMMWORD[16+rsp]
2353         movdqu  XMMWORD[r13],xmm15
2354         pxor    xmm5,XMMWORD[32+rsp]
2355         movdqu  XMMWORD[16+r13],xmm0
2356         pxor    xmm3,XMMWORD[48+rsp]
2357         movdqu  XMMWORD[32+r13],xmm5
2358         pxor    xmm1,XMMWORD[64+rsp]
2359         movdqu  XMMWORD[48+r13],xmm3
2360         pxor    xmm6,XMMWORD[80+rsp]
2361         movdqu  XMMWORD[64+r13],xmm1
2362         movdqu  XMMWORD[80+r13],xmm6
2363         lea     r13,[96+r13]
2364
2365         movdqa  xmm6,XMMWORD[96+rsp]
2366         jmp     NEAR $L$xts_dec_done
2367 ALIGN   16
2368 $L$xts_dec_5:
2369         pxor    xmm2,xmm10
2370         lea     r12,[80+r12]
2371         pxor    xmm3,xmm11
2372         lea     rax,[128+rsp]
2373         mov     r10d,edx
2374
2375         call    _bsaes_decrypt8
2376
2377         pxor    xmm15,XMMWORD[rsp]
2378         pxor    xmm0,XMMWORD[16+rsp]
2379         movdqu  XMMWORD[r13],xmm15
2380         pxor    xmm5,XMMWORD[32+rsp]
2381         movdqu  XMMWORD[16+r13],xmm0
2382         pxor    xmm3,XMMWORD[48+rsp]
2383         movdqu  XMMWORD[32+r13],xmm5
2384         pxor    xmm1,XMMWORD[64+rsp]
2385         movdqu  XMMWORD[48+r13],xmm3
2386         movdqu  XMMWORD[64+r13],xmm1
2387         lea     r13,[80+r13]
2388
2389         movdqa  xmm6,XMMWORD[80+rsp]
2390         jmp     NEAR $L$xts_dec_done
2391 ALIGN   16
2392 $L$xts_dec_4:
2393         pxor    xmm1,xmm9
2394         lea     r12,[64+r12]
2395         pxor    xmm2,xmm10
2396         lea     rax,[128+rsp]
2397         mov     r10d,edx
2398
2399         call    _bsaes_decrypt8
2400
2401         pxor    xmm15,XMMWORD[rsp]
2402         pxor    xmm0,XMMWORD[16+rsp]
2403         movdqu  XMMWORD[r13],xmm15
2404         pxor    xmm5,XMMWORD[32+rsp]
2405         movdqu  XMMWORD[16+r13],xmm0
2406         pxor    xmm3,XMMWORD[48+rsp]
2407         movdqu  XMMWORD[32+r13],xmm5
2408         movdqu  XMMWORD[48+r13],xmm3
2409         lea     r13,[64+r13]
2410
2411         movdqa  xmm6,XMMWORD[64+rsp]
2412         jmp     NEAR $L$xts_dec_done
2413 ALIGN   16
2414 $L$xts_dec_3:
2415         pxor    xmm0,xmm8
2416         lea     r12,[48+r12]
2417         pxor    xmm1,xmm9
2418         lea     rax,[128+rsp]
2419         mov     r10d,edx
2420
2421         call    _bsaes_decrypt8
2422
2423         pxor    xmm15,XMMWORD[rsp]
2424         pxor    xmm0,XMMWORD[16+rsp]
2425         movdqu  XMMWORD[r13],xmm15
2426         pxor    xmm5,XMMWORD[32+rsp]
2427         movdqu  XMMWORD[16+r13],xmm0
2428         movdqu  XMMWORD[32+r13],xmm5
2429         lea     r13,[48+r13]
2430
2431         movdqa  xmm6,XMMWORD[48+rsp]
2432         jmp     NEAR $L$xts_dec_done
2433 ALIGN   16
2434 $L$xts_dec_2:
2435         pxor    xmm15,xmm7
2436         lea     r12,[32+r12]
2437         pxor    xmm0,xmm8
2438         lea     rax,[128+rsp]
2439         mov     r10d,edx
2440
2441         call    _bsaes_decrypt8
2442
2443         pxor    xmm15,XMMWORD[rsp]
2444         pxor    xmm0,XMMWORD[16+rsp]
2445         movdqu  XMMWORD[r13],xmm15
2446         movdqu  XMMWORD[16+r13],xmm0
2447         lea     r13,[32+r13]
2448
2449         movdqa  xmm6,XMMWORD[32+rsp]
2450         jmp     NEAR $L$xts_dec_done
2451 ALIGN   16
2452 $L$xts_dec_1:
2453         pxor    xmm7,xmm15
2454         lea     r12,[16+r12]
2455         movdqa  XMMWORD[32+rbp],xmm7
2456         lea     rcx,[32+rbp]
2457         lea     rdx,[32+rbp]
2458         lea     r8,[r15]
2459         call    asm_AES_decrypt
2460         pxor    xmm15,XMMWORD[32+rbp]
2461
2462
2463
2464
2465
2466         movdqu  XMMWORD[r13],xmm15
2467         lea     r13,[16+r13]
2468
2469         movdqa  xmm6,XMMWORD[16+rsp]
2470
2471 $L$xts_dec_done:
2472         and     ebx,15
2473         jz      NEAR $L$xts_dec_ret
2474
2475         pxor    xmm14,xmm14
2476         movdqa  xmm12,XMMWORD[$L$xts_magic]
2477         pcmpgtd xmm14,xmm6
2478         pshufd  xmm13,xmm14,0x13
2479         movdqa  xmm5,xmm6
2480         paddq   xmm6,xmm6
2481         pand    xmm13,xmm12
2482         movdqu  xmm15,XMMWORD[r12]
2483         pxor    xmm6,xmm13
2484
2485         lea     rcx,[32+rbp]
2486         pxor    xmm15,xmm6
2487         lea     rdx,[32+rbp]
2488         movdqa  XMMWORD[32+rbp],xmm15
2489         lea     r8,[r15]
2490         call    asm_AES_decrypt
2491         pxor    xmm6,XMMWORD[32+rbp]
2492         mov     rdx,r13
2493         movdqu  XMMWORD[r13],xmm6
2494
2495 $L$xts_dec_steal:
2496         movzx   eax,BYTE[16+r12]
2497         movzx   ecx,BYTE[rdx]
2498         lea     r12,[1+r12]
2499         mov     BYTE[rdx],al
2500         mov     BYTE[16+rdx],cl
2501         lea     rdx,[1+rdx]
2502         sub     ebx,1
2503         jnz     NEAR $L$xts_dec_steal
2504
2505         movdqu  xmm15,XMMWORD[r13]
2506         lea     rcx,[32+rbp]
2507         pxor    xmm15,xmm5
2508         lea     rdx,[32+rbp]
2509         movdqa  XMMWORD[32+rbp],xmm15
2510         lea     r8,[r15]
2511         call    asm_AES_decrypt
2512         pxor    xmm5,XMMWORD[32+rbp]
2513         movdqu  XMMWORD[r13],xmm5
2514
2515 $L$xts_dec_ret:
2516         lea     rax,[rsp]
2517         pxor    xmm0,xmm0
2518 $L$xts_dec_bzero:
2519         movdqa  XMMWORD[rax],xmm0
2520         movdqa  XMMWORD[16+rax],xmm0
2521         lea     rax,[32+rax]
2522         cmp     rbp,rax
2523         ja      NEAR $L$xts_dec_bzero
2524
2525         lea     rsp,[rbp]
2526         movaps  xmm6,XMMWORD[64+rbp]
2527         movaps  xmm7,XMMWORD[80+rbp]
2528         movaps  xmm8,XMMWORD[96+rbp]
2529         movaps  xmm9,XMMWORD[112+rbp]
2530         movaps  xmm10,XMMWORD[128+rbp]
2531         movaps  xmm11,XMMWORD[144+rbp]
2532         movaps  xmm12,XMMWORD[160+rbp]
2533         movaps  xmm13,XMMWORD[176+rbp]
2534         movaps  xmm14,XMMWORD[192+rbp]
2535         movaps  xmm15,XMMWORD[208+rbp]
2536         lea     rsp,[160+rbp]
2537         mov     r15,QWORD[72+rsp]
2538         mov     r14,QWORD[80+rsp]
2539         mov     r13,QWORD[88+rsp]
2540         mov     r12,QWORD[96+rsp]
2541         mov     rbx,QWORD[104+rsp]
2542         mov     rax,QWORD[112+rsp]
2543         lea     rsp,[120+rsp]
2544         mov     rbp,rax
2545 $L$xts_dec_epilogue:
2546         DB      0F3h,0C3h               ;repret
2547
2548
2549 ALIGN   64
2550 _bsaes_const:
2551 $L$M0ISR:
2552         DQ      0x0a0e0206070b0f03,0x0004080c0d010509
2553 $L$ISRM0:
2554         DQ      0x01040b0e0205080f,0x0306090c00070a0d
2555 $L$ISR:
2556         DQ      0x0504070602010003,0x0f0e0d0c080b0a09
2557 $L$BS0:
2558         DQ      0x5555555555555555,0x5555555555555555
2559 $L$BS1:
2560         DQ      0x3333333333333333,0x3333333333333333
2561 $L$BS2:
2562         DQ      0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
2563 $L$SR:
2564         DQ      0x0504070600030201,0x0f0e0d0c0a09080b
2565 $L$SRM0:
2566         DQ      0x0304090e00050a0f,0x01060b0c0207080d
2567 $L$M0SR:
2568         DQ      0x0a0e02060f03070b,0x0004080c05090d01
2569 $L$SWPUP:
2570         DQ      0x0706050403020100,0x0c0d0e0f0b0a0908
2571 $L$SWPUPM0SR:
2572         DQ      0x0a0d02060c03070b,0x0004080f05090e01
2573 $L$ADD1:
2574         DQ      0x0000000000000000,0x0000000100000000
2575 $L$ADD2:
2576         DQ      0x0000000000000000,0x0000000200000000
2577 $L$ADD3:
2578         DQ      0x0000000000000000,0x0000000300000000
2579 $L$ADD4:
2580         DQ      0x0000000000000000,0x0000000400000000
2581 $L$ADD5:
2582         DQ      0x0000000000000000,0x0000000500000000
2583 $L$ADD6:
2584         DQ      0x0000000000000000,0x0000000600000000
2585 $L$ADD7:
2586         DQ      0x0000000000000000,0x0000000700000000
2587 $L$ADD8:
2588         DQ      0x0000000000000000,0x0000000800000000
2589 $L$xts_magic:
2590         DD      0x87,0,1,0
2591 $L$masks:
2592         DQ      0x0101010101010101,0x0101010101010101
2593         DQ      0x0202020202020202,0x0202020202020202
2594         DQ      0x0404040404040404,0x0404040404040404
2595         DQ      0x0808080808080808,0x0808080808080808
2596 $L$M0:
2597         DQ      0x02060a0e03070b0f,0x0004080c0105090d
2598 $L$63:
2599         DQ      0x6363636363636363,0x6363636363636363
2600 DB      66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
2601 DB      111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
2602 DB      32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
2603 DB      32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
2604 DB      65,110,100,121,32,80,111,108,121,97,107,111,118,0
2605 ALIGN   64
2606
2607 EXTERN  __imp_RtlVirtualUnwind
2608
2609 ALIGN   16
2610 se_handler:
2611         push    rsi
2612         push    rdi
2613         push    rbx
2614         push    rbp
2615         push    r12
2616         push    r13
2617         push    r14
2618         push    r15
2619         pushfq
2620         sub     rsp,64
2621
2622         mov     rax,QWORD[120+r8]
2623         mov     rbx,QWORD[248+r8]
2624
2625         mov     rsi,QWORD[8+r9]
2626         mov     r11,QWORD[56+r9]
2627
2628         mov     r10d,DWORD[r11]
2629         lea     r10,[r10*1+rsi]
2630         cmp     rbx,r10
2631         jb      NEAR $L$in_prologue
2632
2633         mov     rax,QWORD[152+r8]
2634
2635         mov     r10d,DWORD[4+r11]
2636         lea     r10,[r10*1+rsi]
2637         cmp     rbx,r10
2638         jae     NEAR $L$in_prologue
2639
2640         mov     rax,QWORD[160+r8]
2641
2642         lea     rsi,[64+rax]
2643         lea     rdi,[512+r8]
2644         mov     ecx,20
2645         DD      0xa548f3fc
2646         lea     rax,[160+rax]
2647
2648         mov     rbp,QWORD[112+rax]
2649         mov     rbx,QWORD[104+rax]
2650         mov     r12,QWORD[96+rax]
2651         mov     r13,QWORD[88+rax]
2652         mov     r14,QWORD[80+rax]
2653         mov     r15,QWORD[72+rax]
2654         lea     rax,[120+rax]
2655         mov     QWORD[144+r8],rbx
2656         mov     QWORD[160+r8],rbp
2657         mov     QWORD[216+r8],r12
2658         mov     QWORD[224+r8],r13
2659         mov     QWORD[232+r8],r14
2660         mov     QWORD[240+r8],r15
2661
2662 $L$in_prologue:
2663         mov     QWORD[152+r8],rax
2664
2665         mov     rdi,QWORD[40+r9]
2666         mov     rsi,r8
2667         mov     ecx,154
2668         DD      0xa548f3fc
2669
2670         mov     rsi,r9
2671         xor     rcx,rcx
2672         mov     rdx,QWORD[8+rsi]
2673         mov     r8,QWORD[rsi]
2674         mov     r9,QWORD[16+rsi]
2675         mov     r10,QWORD[40+rsi]
2676         lea     r11,[56+rsi]
2677         lea     r12,[24+rsi]
2678         mov     QWORD[32+rsp],r10
2679         mov     QWORD[40+rsp],r11
2680         mov     QWORD[48+rsp],r12
2681         mov     QWORD[56+rsp],rcx
2682         call    QWORD[__imp_RtlVirtualUnwind]
2683
2684         mov     eax,1
2685         add     rsp,64
2686         popfq
2687         pop     r15
2688         pop     r14
2689         pop     r13
2690         pop     r12
2691         pop     rbp
2692         pop     rbx
2693         pop     rdi
2694         pop     rsi
2695         DB      0F3h,0C3h               ;repret
2696
2697
2698 section .pdata rdata align=4
2699 ALIGN   4
2700         DD      $L$cbc_dec_prologue wrt ..imagebase
2701         DD      $L$cbc_dec_epilogue wrt ..imagebase
2702         DD      $L$cbc_dec_info wrt ..imagebase
2703
2704         DD      $L$ctr_enc_prologue wrt ..imagebase
2705         DD      $L$ctr_enc_epilogue wrt ..imagebase
2706         DD      $L$ctr_enc_info wrt ..imagebase
2707
2708         DD      $L$xts_enc_prologue wrt ..imagebase
2709         DD      $L$xts_enc_epilogue wrt ..imagebase
2710         DD      $L$xts_enc_info wrt ..imagebase
2711
2712         DD      $L$xts_dec_prologue wrt ..imagebase
2713         DD      $L$xts_dec_epilogue wrt ..imagebase
2714         DD      $L$xts_dec_info wrt ..imagebase
2715
2716 section .xdata rdata align=8
2717 ALIGN   8
2718 $L$cbc_dec_info:
2719 DB      9,0,0,0
2720         DD      se_handler wrt ..imagebase
2721         DD      $L$cbc_dec_body wrt ..imagebase,$L$cbc_dec_epilogue wrt ..imagebase
2722 $L$ctr_enc_info:
2723 DB      9,0,0,0
2724         DD      se_handler wrt ..imagebase
2725         DD      $L$ctr_enc_body wrt ..imagebase,$L$ctr_enc_epilogue wrt ..imagebase
2726 $L$xts_enc_info:
2727 DB      9,0,0,0
2728         DD      se_handler wrt ..imagebase
2729         DD      $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
2730 $L$xts_dec_info:
2731 DB      9,0,0,0
2732         DD      se_handler wrt ..imagebase
2733         DD      $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase