RISCV: configs: tizen_visionfive2: Enable DM_BOW feature
[platform/kernel/linux-starfive.git] / arch / powerpc / crypto / aesp10-ppc.pl
1 #! /usr/bin/env perl
2 # SPDX-License-Identifier: GPL-2.0
3
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
8 # Foundation.
9 #
10 # [1] https://www.openssl.org/~appro/cryptogams/
11
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
14 #
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
17 # are met:
18 #
19 #       * Redistributions of source code must retain copyright notices,
20 #         this list of conditions and the following disclaimer.
21 #
22 #       * Redistributions in binary form must reproduce the above
23 #         copyright notice, this list of conditions and the following
24 #         disclaimer in the documentation and/or other materials
25 #         provided with the distribution.
26 #
27 #       * Neither the name of the CRYPTOGAMS nor the names of its
28 #         copyright holder and contributors may be used to endorse or
29 #         promote products derived from this software without specific
30 #         prior written permission.
31 #
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35 # those given above.
36 #
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see https://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
55 #
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
69 #
70 # May 2016
71 #
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
74 #
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
78 #
79 #               CBC en-/decrypt CTR     XTS
80 # POWER8[le]    3.96/0.72       0.74    1.1
81 # POWER8[be]    3.75/0.65       0.66    1.0
82
83 $flavour = shift;
84
85 if ($flavour =~ /64/) {
86         $SIZE_T =8;
87         $LRSAVE =2*$SIZE_T;
88         $STU    ="stdu";
89         $POP    ="ld";
90         $PUSH   ="std";
91         $UCMP   ="cmpld";
92         $SHL    ="sldi";
93 } elsif ($flavour =~ /32/) {
94         $SIZE_T =4;
95         $LRSAVE =$SIZE_T;
96         $STU    ="stwu";
97         $POP    ="lwz";
98         $PUSH   ="stw";
99         $UCMP   ="cmplw";
100         $SHL    ="slwi";
101 } else { die "nonsense $flavour"; }
102
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
104
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
109
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
111
112 $FRAME=8*$SIZE_T;
113 $prefix="aes_p10";
114
115 $sp="r1";
116 $vrsave="r12";
117
118 #########################################################################
119 {{{     # Key setup procedures                                          #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
123
124 $code.=<<___;
125 .machine        "any"
126
127 .text
128
129 .align  7
130 rcon:
131 .long   0x01000000, 0x01000000, 0x01000000, 0x01000000  ?rev
132 .long   0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000  ?rev
133 .long   0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c  ?rev
134 .long   0,0,0,0                                         ?asis
135 Lconsts:
136         mflr    r0
137         bcl     20,31,\$+4
138         mflr    $ptr     #vvvvv "distance between . and rcon
139         addi    $ptr,$ptr,-0x48
140         mtlr    r0
141         blr
142         .long   0
143         .byte   0,12,0x14,0,0,0,0,0
144 .asciz  "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
145
146 .globl  .${prefix}_set_encrypt_key
147 Lset_encrypt_key:
148         mflr            r11
149         $PUSH           r11,$LRSAVE($sp)
150
151         li              $ptr,-1
152         ${UCMP}i        $inp,0
153         beq-            Lenc_key_abort          # if ($inp==0) return -1;
154         ${UCMP}i        $out,0
155         beq-            Lenc_key_abort          # if ($out==0) return -1;
156         li              $ptr,-2
157         cmpwi           $bits,128
158         blt-            Lenc_key_abort
159         cmpwi           $bits,256
160         bgt-            Lenc_key_abort
161         andi.           r0,$bits,0x3f
162         bne-            Lenc_key_abort
163
164         lis             r0,0xfff0
165         mfspr           $vrsave,256
166         mtspr           256,r0
167
168         bl              Lconsts
169         mtlr            r11
170
171         neg             r9,$inp
172         lvx             $in0,0,$inp
173         addi            $inp,$inp,15            # 15 is not typo
174         lvsr            $key,0,r9               # borrow $key
175         li              r8,0x20
176         cmpwi           $bits,192
177         lvx             $in1,0,$inp
178         le?vspltisb     $mask,0x0f              # borrow $mask
179         lvx             $rcon,0,$ptr
180         le?vxor         $key,$key,$mask         # adjust for byte swap
181         lvx             $mask,r8,$ptr
182         addi            $ptr,$ptr,0x10
183         vperm           $in0,$in0,$in1,$key     # align [and byte swap in LE]
184         li              $cnt,8
185         vxor            $zero,$zero,$zero
186         mtctr           $cnt
187
188         ?lvsr           $outperm,0,$out
189         vspltisb        $outmask,-1
190         lvx             $outhead,0,$out
191         ?vperm          $outmask,$zero,$outmask,$outperm
192
193         blt             Loop128
194         addi            $inp,$inp,8
195         beq             L192
196         addi            $inp,$inp,8
197         b               L256
198
199 .align  4
200 Loop128:
201         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
202         vsldoi          $tmp,$zero,$in0,12      # >>32
203          vperm          $outtail,$in0,$in0,$outperm     # rotate
204          vsel           $stage,$outhead,$outtail,$outmask
205          vmr            $outhead,$outtail
206         vcipherlast     $key,$key,$rcon
207          stvx           $stage,0,$out
208          addi           $out,$out,16
209
210         vxor            $in0,$in0,$tmp
211         vsldoi          $tmp,$zero,$tmp,12      # >>32
212         vxor            $in0,$in0,$tmp
213         vsldoi          $tmp,$zero,$tmp,12      # >>32
214         vxor            $in0,$in0,$tmp
215          vadduwm        $rcon,$rcon,$rcon
216         vxor            $in0,$in0,$key
217         bdnz            Loop128
218
219         lvx             $rcon,0,$ptr            # last two round keys
220
221         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
222         vsldoi          $tmp,$zero,$in0,12      # >>32
223          vperm          $outtail,$in0,$in0,$outperm     # rotate
224          vsel           $stage,$outhead,$outtail,$outmask
225          vmr            $outhead,$outtail
226         vcipherlast     $key,$key,$rcon
227          stvx           $stage,0,$out
228          addi           $out,$out,16
229
230         vxor            $in0,$in0,$tmp
231         vsldoi          $tmp,$zero,$tmp,12      # >>32
232         vxor            $in0,$in0,$tmp
233         vsldoi          $tmp,$zero,$tmp,12      # >>32
234         vxor            $in0,$in0,$tmp
235          vadduwm        $rcon,$rcon,$rcon
236         vxor            $in0,$in0,$key
237
238         vperm           $key,$in0,$in0,$mask    # rotate-n-splat
239         vsldoi          $tmp,$zero,$in0,12      # >>32
240          vperm          $outtail,$in0,$in0,$outperm     # rotate
241          vsel           $stage,$outhead,$outtail,$outmask
242          vmr            $outhead,$outtail
243         vcipherlast     $key,$key,$rcon
244          stvx           $stage,0,$out
245          addi           $out,$out,16
246
247         vxor            $in0,$in0,$tmp
248         vsldoi          $tmp,$zero,$tmp,12      # >>32
249         vxor            $in0,$in0,$tmp
250         vsldoi          $tmp,$zero,$tmp,12      # >>32
251         vxor            $in0,$in0,$tmp
252         vxor            $in0,$in0,$key
253          vperm          $outtail,$in0,$in0,$outperm     # rotate
254          vsel           $stage,$outhead,$outtail,$outmask
255          vmr            $outhead,$outtail
256          stvx           $stage,0,$out
257
258         addi            $inp,$out,15            # 15 is not typo
259         addi            $out,$out,0x50
260
261         li              $rounds,10
262         b               Ldone
263
264 .align  4
265 L192:
266         lvx             $tmp,0,$inp
267         li              $cnt,4
268          vperm          $outtail,$in0,$in0,$outperm     # rotate
269          vsel           $stage,$outhead,$outtail,$outmask
270          vmr            $outhead,$outtail
271          stvx           $stage,0,$out
272          addi           $out,$out,16
273         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
274         vspltisb        $key,8                  # borrow $key
275         mtctr           $cnt
276         vsububm         $mask,$mask,$key        # adjust the mask
277
278 Loop192:
279         vperm           $key,$in1,$in1,$mask    # roate-n-splat
280         vsldoi          $tmp,$zero,$in0,12      # >>32
281         vcipherlast     $key,$key,$rcon
282
283         vxor            $in0,$in0,$tmp
284         vsldoi          $tmp,$zero,$tmp,12      # >>32
285         vxor            $in0,$in0,$tmp
286         vsldoi          $tmp,$zero,$tmp,12      # >>32
287         vxor            $in0,$in0,$tmp
288
289          vsldoi         $stage,$zero,$in1,8
290         vspltw          $tmp,$in0,3
291         vxor            $tmp,$tmp,$in1
292         vsldoi          $in1,$zero,$in1,12      # >>32
293          vadduwm        $rcon,$rcon,$rcon
294         vxor            $in1,$in1,$tmp
295         vxor            $in0,$in0,$key
296         vxor            $in1,$in1,$key
297          vsldoi         $stage,$stage,$in0,8
298
299         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
300         vsldoi          $tmp,$zero,$in0,12      # >>32
301          vperm          $outtail,$stage,$stage,$outperm # rotate
302          vsel           $stage,$outhead,$outtail,$outmask
303          vmr            $outhead,$outtail
304         vcipherlast     $key,$key,$rcon
305          stvx           $stage,0,$out
306          addi           $out,$out,16
307
308          vsldoi         $stage,$in0,$in1,8
309         vxor            $in0,$in0,$tmp
310         vsldoi          $tmp,$zero,$tmp,12      # >>32
311          vperm          $outtail,$stage,$stage,$outperm # rotate
312          vsel           $stage,$outhead,$outtail,$outmask
313          vmr            $outhead,$outtail
314         vxor            $in0,$in0,$tmp
315         vsldoi          $tmp,$zero,$tmp,12      # >>32
316         vxor            $in0,$in0,$tmp
317          stvx           $stage,0,$out
318          addi           $out,$out,16
319
320         vspltw          $tmp,$in0,3
321         vxor            $tmp,$tmp,$in1
322         vsldoi          $in1,$zero,$in1,12      # >>32
323          vadduwm        $rcon,$rcon,$rcon
324         vxor            $in1,$in1,$tmp
325         vxor            $in0,$in0,$key
326         vxor            $in1,$in1,$key
327          vperm          $outtail,$in0,$in0,$outperm     # rotate
328          vsel           $stage,$outhead,$outtail,$outmask
329          vmr            $outhead,$outtail
330          stvx           $stage,0,$out
331          addi           $inp,$out,15            # 15 is not typo
332          addi           $out,$out,16
333         bdnz            Loop192
334
335         li              $rounds,12
336         addi            $out,$out,0x20
337         b               Ldone
338
339 .align  4
340 L256:
341         lvx             $tmp,0,$inp
342         li              $cnt,7
343         li              $rounds,14
344          vperm          $outtail,$in0,$in0,$outperm     # rotate
345          vsel           $stage,$outhead,$outtail,$outmask
346          vmr            $outhead,$outtail
347          stvx           $stage,0,$out
348          addi           $out,$out,16
349         vperm           $in1,$in1,$tmp,$key     # align [and byte swap in LE]
350         mtctr           $cnt
351
352 Loop256:
353         vperm           $key,$in1,$in1,$mask    # rotate-n-splat
354         vsldoi          $tmp,$zero,$in0,12      # >>32
355          vperm          $outtail,$in1,$in1,$outperm     # rotate
356          vsel           $stage,$outhead,$outtail,$outmask
357          vmr            $outhead,$outtail
358         vcipherlast     $key,$key,$rcon
359          stvx           $stage,0,$out
360          addi           $out,$out,16
361
362         vxor            $in0,$in0,$tmp
363         vsldoi          $tmp,$zero,$tmp,12      # >>32
364         vxor            $in0,$in0,$tmp
365         vsldoi          $tmp,$zero,$tmp,12      # >>32
366         vxor            $in0,$in0,$tmp
367          vadduwm        $rcon,$rcon,$rcon
368         vxor            $in0,$in0,$key
369          vperm          $outtail,$in0,$in0,$outperm     # rotate
370          vsel           $stage,$outhead,$outtail,$outmask
371          vmr            $outhead,$outtail
372          stvx           $stage,0,$out
373          addi           $inp,$out,15            # 15 is not typo
374          addi           $out,$out,16
375         bdz             Ldone
376
377         vspltw          $key,$in0,3             # just splat
378         vsldoi          $tmp,$zero,$in1,12      # >>32
379         vsbox           $key,$key
380
381         vxor            $in1,$in1,$tmp
382         vsldoi          $tmp,$zero,$tmp,12      # >>32
383         vxor            $in1,$in1,$tmp
384         vsldoi          $tmp,$zero,$tmp,12      # >>32
385         vxor            $in1,$in1,$tmp
386
387         vxor            $in1,$in1,$key
388         b               Loop256
389
390 .align  4
391 Ldone:
392         lvx             $in1,0,$inp             # redundant in aligned case
393         vsel            $in1,$outhead,$in1,$outmask
394         stvx            $in1,0,$inp
395         li              $ptr,0
396         mtspr           256,$vrsave
397         stw             $rounds,0($out)
398
399 Lenc_key_abort:
400         mr              r3,$ptr
401         blr
402         .long           0
403         .byte           0,12,0x14,1,0,0,3,0
404         .long           0
405 .size   .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
406
407 .globl  .${prefix}_set_decrypt_key
408         $STU            $sp,-$FRAME($sp)
409         mflr            r10
410         $PUSH           r10,$FRAME+$LRSAVE($sp)
411         bl              Lset_encrypt_key
412         mtlr            r10
413
414         cmpwi           r3,0
415         bne-            Ldec_key_abort
416
417         slwi            $cnt,$rounds,4
418         subi            $inp,$out,240           # first round key
419         srwi            $rounds,$rounds,1
420         add             $out,$inp,$cnt          # last round key
421         mtctr           $rounds
422
423 Ldeckey:
424         lwz             r0, 0($inp)
425         lwz             r6, 4($inp)
426         lwz             r7, 8($inp)
427         lwz             r8, 12($inp)
428         addi            $inp,$inp,16
429         lwz             r9, 0($out)
430         lwz             r10,4($out)
431         lwz             r11,8($out)
432         lwz             r12,12($out)
433         stw             r0, 0($out)
434         stw             r6, 4($out)
435         stw             r7, 8($out)
436         stw             r8, 12($out)
437         subi            $out,$out,16
438         stw             r9, -16($inp)
439         stw             r10,-12($inp)
440         stw             r11,-8($inp)
441         stw             r12,-4($inp)
442         bdnz            Ldeckey
443
444         xor             r3,r3,r3                # return value
445 Ldec_key_abort:
446         addi            $sp,$sp,$FRAME
447         blr
448         .long           0
449         .byte           0,12,4,1,0x80,0,3,0
450         .long           0
451 .size   .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
452 ___
453 }}}
454 #########################################################################
455 {{{     # Single block en- and decrypt procedures                       #
456 sub gen_block () {
457 my $dir = shift;
458 my $n   = $dir eq "de" ? "n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
460
461 $code.=<<___;
462 .globl  .${prefix}_${dir}crypt
463         lwz             $rounds,240($key)
464         lis             r0,0xfc00
465         mfspr           $vrsave,256
466         li              $idx,15                 # 15 is not typo
467         mtspr           256,r0
468
469         lvx             v0,0,$inp
470         neg             r11,$out
471         lvx             v1,$idx,$inp
472         lvsl            v2,0,$inp               # inpperm
473         le?vspltisb     v4,0x0f
474         ?lvsl           v3,0,r11                # outperm
475         le?vxor         v2,v2,v4
476         li              $idx,16
477         vperm           v0,v0,v1,v2             # align [and byte swap in LE]
478         lvx             v1,0,$key
479         ?lvsl           v5,0,$key               # keyperm
480         srwi            $rounds,$rounds,1
481         lvx             v2,$idx,$key
482         addi            $idx,$idx,16
483         subi            $rounds,$rounds,1
484         ?vperm          v1,v1,v2,v5             # align round key
485
486         vxor            v0,v0,v1
487         lvx             v1,$idx,$key
488         addi            $idx,$idx,16
489         mtctr           $rounds
490
491 Loop_${dir}c:
492         ?vperm          v2,v2,v1,v5
493         v${n}cipher     v0,v0,v2
494         lvx             v2,$idx,$key
495         addi            $idx,$idx,16
496         ?vperm          v1,v1,v2,v5
497         v${n}cipher     v0,v0,v1
498         lvx             v1,$idx,$key
499         addi            $idx,$idx,16
500         bdnz            Loop_${dir}c
501
502         ?vperm          v2,v2,v1,v5
503         v${n}cipher     v0,v0,v2
504         lvx             v2,$idx,$key
505         ?vperm          v1,v1,v2,v5
506         v${n}cipherlast v0,v0,v1
507
508         vspltisb        v2,-1
509         vxor            v1,v1,v1
510         li              $idx,15                 # 15 is not typo
511         ?vperm          v2,v1,v2,v3             # outmask
512         le?vxor         v3,v3,v4
513         lvx             v1,0,$out               # outhead
514         vperm           v0,v0,v0,v3             # rotate [and byte swap in LE]
515         vsel            v1,v1,v0,v2
516         lvx             v4,$idx,$out
517         stvx            v1,0,$out
518         vsel            v0,v0,v4,v2
519         stvx            v0,$idx,$out
520
521         mtspr           256,$vrsave
522         blr
523         .long           0
524         .byte           0,12,0x14,0,0,0,3,0
525         .long           0
526 .size   .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
527 ___
528 }
529 &gen_block("en");
530 &gen_block("de");
531 }}}
532
533 my $consts=1;
534 foreach(split("\n",$code)) {
535         s/\`([^\`]*)\`/eval($1)/geo;
536
537         # constants table endian-specific conversion
538         if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
539             my $conv=$3;
540             my @bytes=();
541
542             # convert to endian-agnostic format
543             if ($1 eq "long") {
544               foreach (split(/,\s*/,$2)) {
545                 my $l = /^0/?oct:int;
546                 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
547               }
548             } else {
549                 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
550             }
551
552             # little-endian conversion
553             if ($flavour =~ /le$/o) {
554                 SWITCH: for($conv)  {
555                     /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
556                     /\?rev/ && do   { @bytes=reverse(@bytes);    last; };
557                 }
558             }
559
560             #emit
561             print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
562             next;
563         }
564         $consts=0 if (m/Lconsts:/o);    # end of table
565
566         # instructions prefixed with '?' are endian-specific and need
567         # to be adjusted accordingly...
568         if ($flavour =~ /le$/o) {       # little-endian
569             s/le\?//o           or
570             s/be\?/#be#/o       or
571             s/\?lvsr/lvsl/o     or
572             s/\?lvsl/lvsr/o     or
573             s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
574             s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
575             s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
576         } else {                        # big-endian
577             s/le\?/#le#/o       or
578             s/be\?//o           or
579             s/\?([a-z]+)/$1/o;
580         }
581
582         print $_,"\n";
583 }
584
585 close STDOUT;