added dgemm-, dtrmm-, zgemm- and ztrmm-kernel for power8
[platform/upstream/openblas.git] / kernel / power / zgemm_logic_8x2_power8.S
1         srawi.          J,      N,      1
2         ble             ZGEMM_L2_END
3
4 ZGEMM_L2_BEGIN:
5
6         mr              CO,     C
7         mr              AO,     A
8         slwi            T1,     LDC     ,       1
9         add             C,      C,      T1
10         srawi.          I,      M,      3
11         ble             ZGEMM_L2x8_END
12
13 ZGEMM_L2x8_BEGIN:
14
15
16         mr              BO,     B
17         srawi.          L,      K,      3
18         ble             ZGEMM_L2x8_SUB0
19         cmpwi           cr0,    L,      1
20         ble             ZGEMM_L2x8_SUB4
21
22 ZGEMM_L2x8_LOOP_START:
23
24         dcbt            AO,     PRE
25         LOAD2x8_1
26         dcbt            AO,     PRE
27         KERNEL2x8_I1
28         dcbt            AO,     PRE
29         KERNEL2x8_2
30         dcbt            AO,     PRE
31         KERNEL2x8_1
32         dcbt            AO,     PRE
33         KERNEL2x8_2
34
35         dcbt            AO,     PRE
36         KERNEL2x8_1
37         dcbt            AO,     PRE
38         KERNEL2x8_2
39         dcbt            AO,     PRE
40         KERNEL2x8_1
41         dcbt            AO,     PRE
42         KERNEL2x8_2
43
44         addic.          L,      L,      -2
45         ble             ZGEMM_L2x8_LOOP_END
46
47         .align 5
48
49 ZGEMM_L2x8_LOOP:
50
51         dcbt            AO,     PRE
52         KERNEL2x8_1
53         dcbt            AO,     PRE
54         KERNEL2x8_2
55         dcbt            AO,     PRE
56         KERNEL2x8_1
57         dcbt            AO,     PRE
58         KERNEL2x8_2
59
60         dcbt            AO,     PRE
61         KERNEL2x8_1
62         dcbt            AO,     PRE
63         KERNEL2x8_2
64         dcbt            AO,     PRE
65         KERNEL2x8_1
66         dcbt            AO,     PRE
67         KERNEL2x8_2
68
69         addic.          L,      L,      -1
70         bgt             ZGEMM_L2x8_LOOP
71
72 ZGEMM_L2x8_LOOP_END:
73
74         dcbt            AO,     PRE
75         KERNEL2x8_1
76         dcbt            AO,     PRE
77         KERNEL2x8_2
78         dcbt            AO,     PRE
79         KERNEL2x8_1
80         dcbt            AO,     PRE
81         KERNEL2x8_2
82
83         dcbt            AO,     PRE
84         KERNEL2x8_1
85         dcbt            AO,     PRE
86         KERNEL2x8_2
87         dcbt            AO,     PRE
88         KERNEL2x8_1
89         KERNEL2x8_E2
90
91         b               ZGEMM_L2x8_SUB1
92
93 ZGEMM_L2x8_SUB4:
94
95         dcbt            AO,     PRE
96         KERNEL2x8_SUBI1
97         dcbt            AO,     PRE
98         KERNEL2x8_SUB1
99         dcbt            AO,     PRE
100         KERNEL2x8_SUB1
101         dcbt            AO,     PRE
102         KERNEL2x8_SUB1
103
104         KERNEL2x8_SUB1
105         KERNEL2x8_SUB1
106         KERNEL2x8_SUB1
107         KERNEL2x8_SUB1
108
109         b               ZGEMM_L2x8_SUB1
110
111 ZGEMM_L2x8_SUB0:
112
113         andi.           L,      K,      7
114
115         KERNEL2x8_SUBI1
116
117         addic.          L,      L,      -1
118         ble             ZGEMM_L2x8_SAVE
119         b               ZGEMM_L2x8_SUB2
120
121 ZGEMM_L2x8_SUB1:
122
123         andi.           L,      K,      7
124         ble             ZGEMM_L2x8_SAVE
125
126 ZGEMM_L2x8_SUB2:
127
128         KERNEL2x8_SUB1
129
130         addic.          L,      L,      -1
131         bgt             ZGEMM_L2x8_SUB2
132
133 ZGEMM_L2x8_SAVE:
134
135         SAVE2x8
136
137         addic.          I,      I,      -1
138         bgt             ZGEMM_L2x8_BEGIN
139
140 ZGEMM_L2x8_END:
141
142 ZGEMM_L2x4_BEGIN:
143
144         andi.           T2,     M,      7
145         ble             ZGEMM_L2x1_END
146
147         andi.           T1,     M,      4
148         ble             ZGEMM_L2x4_END
149         mr              BO,     B
150         srawi.          L,      K,      3
151         ble             ZGEMM_L2x4_SUB0
152         cmpwi           cr0,    L,      1
153         ble             ZGEMM_L2x4_SUB4
154
155 ZGEMM_L2x4_LOOP_START:
156
157         LOAD2x4_1
158         KERNEL2x4_I1
159         KERNEL2x4_2
160         KERNEL2x4_1
161         KERNEL2x4_2
162
163         KERNEL2x4_1
164         KERNEL2x4_2
165         KERNEL2x4_1
166         KERNEL2x4_2
167
168         addic.          L,      L,      -2
169         ble             ZGEMM_L2x4_LOOP_END
170
171         .align 5
172
173 ZGEMM_L2x4_LOOP:
174
175         KERNEL2x4_1
176         KERNEL2x4_2
177         KERNEL2x4_1
178         KERNEL2x4_2
179
180         KERNEL2x4_1
181         KERNEL2x4_2
182         KERNEL2x4_1
183         KERNEL2x4_2
184
185         addic.          L,      L,      -1
186         bgt             ZGEMM_L2x4_LOOP
187
188 ZGEMM_L2x4_LOOP_END:
189
190         KERNEL2x4_1
191         KERNEL2x4_2
192         KERNEL2x4_1
193         KERNEL2x4_2
194
195         KERNEL2x4_1
196         KERNEL2x4_2
197         KERNEL2x4_1
198         KERNEL2x4_E2
199
200         b               ZGEMM_L2x4_SUB1
201
202 ZGEMM_L2x4_SUB4:
203
204         KERNEL2x4_SUBI1
205         KERNEL2x4_SUB1
206         KERNEL2x4_SUB1
207         KERNEL2x4_SUB1
208
209         KERNEL2x4_SUB1
210         KERNEL2x4_SUB1
211         KERNEL2x4_SUB1
212         KERNEL2x4_SUB1
213
214         b               ZGEMM_L2x4_SUB1
215
216 ZGEMM_L2x4_SUB0:
217
218         andi.           L,      K,      7
219
220         KERNEL2x4_SUBI1
221
222         addic.          L,      L,      -1
223         ble             ZGEMM_L2x4_SAVE
224         b               ZGEMM_L2x4_SUB2
225
226 ZGEMM_L2x4_SUB1:
227
228         andi.           L,      K,      7
229         ble             ZGEMM_L2x4_SAVE
230
231 ZGEMM_L2x4_SUB2:
232
233         KERNEL2x4_SUB1
234
235         addic.          L,      L,      -1
236         bgt             ZGEMM_L2x4_SUB2
237
238 ZGEMM_L2x4_SAVE:
239
240         SAVE2x4
241
242 ZGEMM_L2x4_END:
243
244 ZGEMM_L2x2_BEGIN:
245
246
247         andi.           T1,     M,      2
248         ble             ZGEMM_L2x2_END
249         mr              BO,     B
250         srawi.          L,      K,      3
251         ble             ZGEMM_L2x2_SUB0
252         cmpwi           cr0,    L,      1
253         ble             ZGEMM_L2x2_SUB4
254
255 ZGEMM_L2x2_LOOP_START:
256
257         LOAD2x2_1
258         KERNEL2x2_I1
259         KERNEL2x2_2
260         KERNEL2x2_1
261         KERNEL2x2_2
262
263         KERNEL2x2_1
264         KERNEL2x2_2
265         KERNEL2x2_1
266         KERNEL2x2_2
267
268         addic.          L,      L,      -2
269         ble             ZGEMM_L2x2_LOOP_END
270
271         .align 5
272
273 ZGEMM_L2x2_LOOP:
274
275         KERNEL2x2_1
276         KERNEL2x2_2
277         KERNEL2x2_1
278         KERNEL2x2_2
279
280         KERNEL2x2_1
281         KERNEL2x2_2
282         KERNEL2x2_1
283         KERNEL2x2_2
284
285         addic.          L,      L,      -1
286         bgt             ZGEMM_L2x2_LOOP
287
288 ZGEMM_L2x2_LOOP_END:
289
290         KERNEL2x2_1
291         KERNEL2x2_2
292         KERNEL2x2_1
293         KERNEL2x2_2
294
295         KERNEL2x2_1
296         KERNEL2x2_2
297         KERNEL2x2_1
298         KERNEL2x2_E2
299
300         b               ZGEMM_L2x2_SUB1
301
302 ZGEMM_L2x2_SUB4:
303
304         KERNEL2x2_SUBI1
305         KERNEL2x2_SUB1
306         KERNEL2x2_SUB1
307         KERNEL2x2_SUB1
308
309         KERNEL2x2_SUB1
310         KERNEL2x2_SUB1
311         KERNEL2x2_SUB1
312         KERNEL2x2_SUB1
313
314         b               ZGEMM_L2x2_SUB1
315
316 ZGEMM_L2x2_SUB0:
317
318         andi.           L,      K,      7
319
320         KERNEL2x2_SUBI1
321
322         addic.          L,      L,      -1
323         ble             ZGEMM_L2x2_SAVE
324         b               ZGEMM_L2x2_SUB2
325
326 ZGEMM_L2x2_SUB1:
327
328         andi.           L,      K,      7
329         ble             ZGEMM_L2x2_SAVE
330
331 ZGEMM_L2x2_SUB2:
332
333         KERNEL2x2_SUB1
334
335         addic.          L,      L,      -1
336         bgt             ZGEMM_L2x2_SUB2
337
338 ZGEMM_L2x2_SAVE:
339
340         SAVE2x2
341
342 ZGEMM_L2x2_END:
343
344 ZGEMM_L2x1_BEGIN:
345
346
347         andi.           T1,     M,      1
348         ble             ZGEMM_L2x1_END
349         mr              BO,     B
350         srawi.          L,      K,      3
351         ble             ZGEMM_L2x1_SUB0
352         cmpwi           cr0,    L,      1
353         ble             ZGEMM_L2x1_SUB4
354
355 ZGEMM_L2x1_LOOP_START:
356
357         LOAD2x1_1
358         KERNEL2x1_I1
359         KERNEL2x1_2
360         KERNEL2x1_1
361         KERNEL2x1_2
362
363         KERNEL2x1_1
364         KERNEL2x1_2
365         KERNEL2x1_1
366         KERNEL2x1_2
367
368         addic.          L,      L,      -2
369         ble             ZGEMM_L2x1_LOOP_END
370
371         .align 5
372
373 ZGEMM_L2x1_LOOP:
374
375         KERNEL2x1_1
376         KERNEL2x1_2
377         KERNEL2x1_1
378         KERNEL2x1_2
379
380         KERNEL2x1_1
381         KERNEL2x1_2
382         KERNEL2x1_1
383         KERNEL2x1_2
384
385         addic.          L,      L,      -1
386         bgt             ZGEMM_L2x1_LOOP
387
388 ZGEMM_L2x1_LOOP_END:
389
390         KERNEL2x1_1
391         KERNEL2x1_2
392         KERNEL2x1_1
393         KERNEL2x1_2
394
395         KERNEL2x1_1
396         KERNEL2x1_2
397         KERNEL2x1_1
398         KERNEL2x1_E2
399
400         b               ZGEMM_L2x1_SUB1
401
402 ZGEMM_L2x1_SUB4:
403
404         KERNEL2x1_SUBI1
405         KERNEL2x1_SUB1
406         KERNEL2x1_SUB1
407         KERNEL2x1_SUB1
408
409         KERNEL2x1_SUB1
410         KERNEL2x1_SUB1
411         KERNEL2x1_SUB1
412         KERNEL2x1_SUB1
413
414         b               ZGEMM_L2x1_SUB1
415
416 ZGEMM_L2x1_SUB0:
417
418         andi.           L,      K,      7
419
420         KERNEL2x1_SUBI1
421
422         addic.          L,      L,      -1
423         ble             ZGEMM_L2x1_SAVE
424         b               ZGEMM_L2x1_SUB2
425
426 ZGEMM_L2x1_SUB1:
427
428         andi.           L,      K,      7
429         ble             ZGEMM_L2x1_SAVE
430
431 ZGEMM_L2x1_SUB2:
432
433         KERNEL2x1_SUB1
434
435         addic.          L,      L,      -1
436         bgt             ZGEMM_L2x1_SUB2
437
438 ZGEMM_L2x1_SAVE:
439
440         SAVE2x1
441
442 ZGEMM_L2x1_END:
443
444         slwi            T1,     K,      5
445         add             B,      B,      T1
446
447         addic.          J,      J,      -1
448         bgt             ZGEMM_L2_BEGIN
449
450         andi.           T2,     N,      1
451         ble             L999
452
453 ZGEMM_L2_END:
454
455         b               ZGEMM_L1_BEGIN
456
457 L999_H1:
458
459         b               L999
460
461 ZGEMM_L1_BEGIN:
462
463         andi.           T1,     N,      1
464         ble             ZGEMM_L1_END
465         mr              CO,     C
466         mr              AO,     A
467         srawi.          I,      M,      3
468         ble             ZGEMM_L1x8_END
469
470 ZGEMM_L1x8_BEGIN:
471
472
473         mr              BO,     B
474         srawi.          L,      K,      3
475         ble             ZGEMM_L1x8_SUB0
476         cmpwi           cr0,    L,      1
477         ble             ZGEMM_L1x8_SUB4
478
479 ZGEMM_L1x8_LOOP_START:
480
481         dcbt            AO,     PRE
482         LOAD1x8_1
483         dcbt            AO,     PRE
484         KERNEL1x8_I1
485         dcbt            AO,     PRE
486         KERNEL1x8_2
487         dcbt            AO,     PRE
488         KERNEL1x8_1
489         dcbt            AO,     PRE
490         KERNEL1x8_2
491
492         dcbt            AO,     PRE
493         KERNEL1x8_1
494         dcbt            AO,     PRE
495         KERNEL1x8_2
496         dcbt            AO,     PRE
497         KERNEL1x8_1
498         dcbt            AO,     PRE
499         KERNEL1x8_2
500
501         addic.          L,      L,      -2
502         ble             ZGEMM_L1x8_LOOP_END
503
504         .align 5
505
506 ZGEMM_L1x8_LOOP:
507
508         dcbt            AO,     PRE
509         KERNEL1x8_1
510         dcbt            AO,     PRE
511         KERNEL1x8_2
512         dcbt            AO,     PRE
513         KERNEL1x8_1
514         dcbt            AO,     PRE
515         KERNEL1x8_2
516
517         dcbt            AO,     PRE
518         KERNEL1x8_1
519         dcbt            AO,     PRE
520         KERNEL1x8_2
521         dcbt            AO,     PRE
522         KERNEL1x8_1
523         dcbt            AO,     PRE
524         KERNEL1x8_2
525
526         addic.          L,      L,      -1
527         bgt             ZGEMM_L1x8_LOOP
528
529 ZGEMM_L1x8_LOOP_END:
530
531         dcbt            AO,     PRE
532         KERNEL1x8_1
533         dcbt            AO,     PRE
534         KERNEL1x8_2
535         dcbt            AO,     PRE
536         KERNEL1x8_1
537         dcbt            AO,     PRE
538         KERNEL1x8_2
539
540         dcbt            AO,     PRE
541         KERNEL1x8_1
542         dcbt            AO,     PRE
543         KERNEL1x8_2
544         dcbt            AO,     PRE
545         KERNEL1x8_1
546         KERNEL1x8_E2
547
548         b               ZGEMM_L1x8_SUB1
549
550 ZGEMM_L1x8_SUB4:
551
552         dcbt            AO,     PRE
553         KERNEL1x8_SUBI1
554         dcbt            AO,     PRE
555         KERNEL1x8_SUB1
556         dcbt            AO,     PRE
557         KERNEL1x8_SUB1
558         dcbt            AO,     PRE
559         KERNEL1x8_SUB1
560
561         KERNEL1x8_SUB1
562         KERNEL1x8_SUB1
563         KERNEL1x8_SUB1
564         KERNEL1x8_SUB1
565
566         b               ZGEMM_L1x8_SUB1
567
568 ZGEMM_L1x8_SUB0:
569
570         andi.           L,      K,      7
571
572         KERNEL1x8_SUBI1
573
574         addic.          L,      L,      -1
575         ble             ZGEMM_L1x8_SAVE
576         b               ZGEMM_L1x8_SUB2
577
578 ZGEMM_L1x8_SUB1:
579
580         andi.           L,      K,      7
581         ble             ZGEMM_L1x8_SAVE
582
583 ZGEMM_L1x8_SUB2:
584
585         KERNEL1x8_SUB1
586
587         addic.          L,      L,      -1
588         bgt             ZGEMM_L1x8_SUB2
589
590 ZGEMM_L1x8_SAVE:
591
592         SAVE1x8
593
594         addic.          I,      I,      -1
595         bgt             ZGEMM_L1x8_BEGIN
596
597 ZGEMM_L1x8_END:
598
599 ZGEMM_L1x4_BEGIN:
600
601         andi.           T2,     M,      7
602         ble             ZGEMM_L1x1_END
603
604         andi.           T1,     M,      4
605         ble             ZGEMM_L1x4_END
606         mr              BO,     B
607         srawi.          L,      K,      3
608         ble             ZGEMM_L1x4_SUB0
609         cmpwi           cr0,    L,      1
610         ble             ZGEMM_L1x4_SUB4
611
612 ZGEMM_L1x4_LOOP_START:
613
614         LOAD1x4_1
615         KERNEL1x4_I1
616         KERNEL1x4_2
617         KERNEL1x4_1
618         KERNEL1x4_2
619
620         KERNEL1x4_1
621         KERNEL1x4_2
622         KERNEL1x4_1
623         KERNEL1x4_2
624
625         addic.          L,      L,      -2
626         ble             ZGEMM_L1x4_LOOP_END
627
628         .align 5
629
630 ZGEMM_L1x4_LOOP:
631
632         KERNEL1x4_1
633         KERNEL1x4_2
634         KERNEL1x4_1
635         KERNEL1x4_2
636
637         KERNEL1x4_1
638         KERNEL1x4_2
639         KERNEL1x4_1
640         KERNEL1x4_2
641
642         addic.          L,      L,      -1
643         bgt             ZGEMM_L1x4_LOOP
644
645 ZGEMM_L1x4_LOOP_END:
646
647         KERNEL1x4_1
648         KERNEL1x4_2
649         KERNEL1x4_1
650         KERNEL1x4_2
651
652         KERNEL1x4_1
653         KERNEL1x4_2
654         KERNEL1x4_1
655         KERNEL1x4_E2
656
657         b               ZGEMM_L1x4_SUB1
658
659 ZGEMM_L1x4_SUB4:
660
661         KERNEL1x4_SUBI1
662         KERNEL1x4_SUB1
663         KERNEL1x4_SUB1
664         KERNEL1x4_SUB1
665
666         KERNEL1x4_SUB1
667         KERNEL1x4_SUB1
668         KERNEL1x4_SUB1
669         KERNEL1x4_SUB1
670
671         b               ZGEMM_L1x4_SUB1
672
673 ZGEMM_L1x4_SUB0:
674
675         andi.           L,      K,      7
676
677         KERNEL1x4_SUBI1
678
679         addic.          L,      L,      -1
680         ble             ZGEMM_L1x4_SAVE
681         b               ZGEMM_L1x4_SUB2
682
683 ZGEMM_L1x4_SUB1:
684
685         andi.           L,      K,      7
686         ble             ZGEMM_L1x4_SAVE
687
688 ZGEMM_L1x4_SUB2:
689
690         KERNEL1x4_SUB1
691
692         addic.          L,      L,      -1
693         bgt             ZGEMM_L1x4_SUB2
694
695 ZGEMM_L1x4_SAVE:
696
697         SAVE1x4
698
699 ZGEMM_L1x4_END:
700
701 ZGEMM_L1x2_BEGIN:
702
703
704         andi.           T1,     M,      2
705         ble             ZGEMM_L1x2_END
706         mr              BO,     B
707         srawi.          L,      K,      3
708         ble             ZGEMM_L1x2_SUB0
709         cmpwi           cr0,    L,      1
710         ble             ZGEMM_L1x2_SUB4
711
712 ZGEMM_L1x2_LOOP_START:
713
714         LOAD1x2_1
715         KERNEL1x2_I1
716         KERNEL1x2_2
717         KERNEL1x2_1
718         KERNEL1x2_2
719
720         KERNEL1x2_1
721         KERNEL1x2_2
722         KERNEL1x2_1
723         KERNEL1x2_2
724
725         addic.          L,      L,      -2
726         ble             ZGEMM_L1x2_LOOP_END
727
728         .align 5
729
730 ZGEMM_L1x2_LOOP:
731
732         KERNEL1x2_1
733         KERNEL1x2_2
734         KERNEL1x2_1
735         KERNEL1x2_2
736
737         KERNEL1x2_1
738         KERNEL1x2_2
739         KERNEL1x2_1
740         KERNEL1x2_2
741
742         addic.          L,      L,      -1
743         bgt             ZGEMM_L1x2_LOOP
744
745 ZGEMM_L1x2_LOOP_END:
746
747         KERNEL1x2_1
748         KERNEL1x2_2
749         KERNEL1x2_1
750         KERNEL1x2_2
751
752         KERNEL1x2_1
753         KERNEL1x2_2
754         KERNEL1x2_1
755         KERNEL1x2_E2
756
757         b               ZGEMM_L1x2_SUB1
758
759 ZGEMM_L1x2_SUB4:
760
761         KERNEL1x2_SUBI1
762         KERNEL1x2_SUB1
763         KERNEL1x2_SUB1
764         KERNEL1x2_SUB1
765
766         KERNEL1x2_SUB1
767         KERNEL1x2_SUB1
768         KERNEL1x2_SUB1
769         KERNEL1x2_SUB1
770
771         b               ZGEMM_L1x2_SUB1
772
773 ZGEMM_L1x2_SUB0:
774
775         andi.           L,      K,      7
776
777         KERNEL1x2_SUBI1
778
779         addic.          L,      L,      -1
780         ble             ZGEMM_L1x2_SAVE
781         b               ZGEMM_L1x2_SUB2
782
783 ZGEMM_L1x2_SUB1:
784
785         andi.           L,      K,      7
786         ble             ZGEMM_L1x2_SAVE
787
788 ZGEMM_L1x2_SUB2:
789
790         KERNEL1x2_SUB1
791
792         addic.          L,      L,      -1
793         bgt             ZGEMM_L1x2_SUB2
794
795 ZGEMM_L1x2_SAVE:
796
797         SAVE1x2
798
799 ZGEMM_L1x2_END:
800
801 ZGEMM_L1x1_BEGIN:
802
803
804         andi.           T1,     M,      1
805         ble             ZGEMM_L1x1_END
806         mr              BO,     B
807         srawi.          L,      K,      3
808         ble             ZGEMM_L1x1_SUB0
809         cmpwi           cr0,    L,      1
810         ble             ZGEMM_L1x1_SUB4
811
812 ZGEMM_L1x1_LOOP_START:
813
814         LOAD1x1_1
815         KERNEL1x1_I1
816         KERNEL1x1_2
817         KERNEL1x1_1
818         KERNEL1x1_2
819
820         KERNEL1x1_1
821         KERNEL1x1_2
822         KERNEL1x1_1
823         KERNEL1x1_2
824
825         addic.          L,      L,      -2
826         ble             ZGEMM_L1x1_LOOP_END
827
828         .align 5
829
830 ZGEMM_L1x1_LOOP:
831
832         KERNEL1x1_1
833         KERNEL1x1_2
834         KERNEL1x1_1
835         KERNEL1x1_2
836
837         KERNEL1x1_1
838         KERNEL1x1_2
839         KERNEL1x1_1
840         KERNEL1x1_2
841
842         addic.          L,      L,      -1
843         bgt             ZGEMM_L1x1_LOOP
844
845 ZGEMM_L1x1_LOOP_END:
846
847         KERNEL1x1_1
848         KERNEL1x1_2
849         KERNEL1x1_1
850         KERNEL1x1_2
851
852         KERNEL1x1_1
853         KERNEL1x1_2
854         KERNEL1x1_1
855         KERNEL1x1_E2
856
857         b               ZGEMM_L1x1_SUB1
858
859 ZGEMM_L1x1_SUB4:
860
861         KERNEL1x1_SUBI1
862         KERNEL1x1_SUB1
863         KERNEL1x1_SUB1
864         KERNEL1x1_SUB1
865
866         KERNEL1x1_SUB1
867         KERNEL1x1_SUB1
868         KERNEL1x1_SUB1
869         KERNEL1x1_SUB1
870
871         b               ZGEMM_L1x1_SUB1
872
873 ZGEMM_L1x1_SUB0:
874
875         andi.           L,      K,      7
876
877         KERNEL1x1_SUBI1
878
879         addic.          L,      L,      -1
880         ble             ZGEMM_L1x1_SAVE
881         b               ZGEMM_L1x1_SUB2
882
883 ZGEMM_L1x1_SUB1:
884
885         andi.           L,      K,      7
886         ble             ZGEMM_L1x1_SAVE
887
888 ZGEMM_L1x1_SUB2:
889
890         KERNEL1x1_SUB1
891
892         addic.          L,      L,      -1
893         bgt             ZGEMM_L1x1_SUB2
894
895 ZGEMM_L1x1_SAVE:
896
897         SAVE1x1
898
899 ZGEMM_L1x1_END:
900
901 ZGEMM_L1_END: