1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
64 #if defined(_AIX) || defined(__APPLE__)
65 #if !defined(__64BIT__) && defined(DOUBLE)
143 #define PREFETCHSIZE_A 24
144 #define PREFETCHSIZE_C 16
147 #if defined(PPC440) || defined(PPC440FP2)
148 #define PREFETCHSIZE_A 24
149 #define PREFETCHSIZE_C 16
153 #define PREFETCHSIZE_A 16
154 #define PREFETCHSIZE_C 16
158 #define PREFETCHSIZE_A 16
159 #define PREFETCHSIZE_C 16
163 #define PREFETCHSIZE_A 16
164 #define PREFETCHSIZE_C 16
168 #define PREFETCHSIZE_A 40
169 #define PREFETCHSIZE_C 24
173 #define PREFETCHSIZE_A 96
174 #define PREFETCHSIZE_C 40
178 #define PREFETCHSIZE_A 96
179 #define PREFETCHSIZE_C 40
185 #define STACKSIZE 224
186 #define ALPHA 200(SP)
187 #define FZERO 208(SP)
189 #define STACKSIZE 280
190 #define ALPHA 256(SP)
191 #define FZERO 264(SP)
197 addi SP, SP, -STACKSIZE
256 lwz INCY, FRAMESLOT(0) + STACKSIZE(SP)
257 lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP)
259 ld Y, FRAMESLOT(0) + STACKSIZE(SP)
260 ld INCY, FRAMESLOT(1) + STACKSIZE(SP)
261 ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
265 #if defined(_AIX) || defined(__APPLE__)
268 lwz INCX, FRAMESLOT(0) + STACKSIZE(SP)
269 lwz Y, FRAMESLOT(1) + STACKSIZE(SP)
270 lwz INCY, FRAMESLOT(2) + STACKSIZE(SP)
271 lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP)
273 lwz Y, FRAMESLOT(0) + STACKSIZE(SP)
274 lwz INCY, FRAMESLOT(1) + STACKSIZE(SP)
275 lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
278 ld Y, FRAMESLOT(0) + STACKSIZE(SP)
279 ld INCY, FRAMESLOT(1) + STACKSIZE(SP)
280 ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
287 slwi LDA8, LDA, BASE_SHIFT + 3
288 slwi LDA, LDA, BASE_SHIFT
289 slwi INCX, INCX, BASE_SHIFT
290 slwi INCY, INCY, BASE_SHIFT
292 li PREA, PREFETCHSIZE_A * SIZE
293 li PREC, PREFETCHSIZE_C * SIZE
304 cmpi cr0, 0, INCY, SIZE
316 STFD f0, 0 * SIZE(Y1)
317 STFD f0, 1 * SIZE(Y1)
318 STFD f0, 2 * SIZE(Y1)
319 STFD f0, 3 * SIZE(Y1)
320 STFD f0, 4 * SIZE(Y1)
321 STFD f0, 5 * SIZE(Y1)
322 STFD f0, 6 * SIZE(Y1)
323 STFD f0, 7 * SIZE(Y1)
324 addi Y1, Y1, 8 * SIZE
334 LFD alpha1, 0 * SIZE(X)
336 LFD alpha2, 0 * SIZE(X)
338 LFD alpha3, 0 * SIZE(X)
340 LFD alpha4, 0 * SIZE(X)
342 LFD alpha5, 0 * SIZE(X)
344 LFD alpha6, 0 * SIZE(X)
346 LFD alpha7, 0 * SIZE(X)
348 LFD alpha8, 0 * SIZE(X)
351 FMUL alpha1, alpha, alpha1
352 FMUL alpha2, alpha, alpha2
353 FMUL alpha3, alpha, alpha3
354 FMUL alpha4, alpha, alpha4
355 FMUL alpha5, alpha, alpha5
356 FMUL alpha6, alpha, alpha6
357 FMUL alpha7, alpha, alpha7
358 FMUL alpha8, alpha, alpha8
376 LFD y01, 0 * SIZE(Y1)
377 LFD y02, 1 * SIZE(Y1)
378 LFD y03, 2 * SIZE(Y1)
379 LFD y04, 3 * SIZE(Y1)
380 LFD y05, 4 * SIZE(Y1)
381 LFD y06, 5 * SIZE(Y1)
382 LFD y07, 6 * SIZE(Y1)
383 LFD y08, 7 * SIZE(Y1)
385 LFD a1, 0 * SIZE(AO1)
386 LFD a2, 1 * SIZE(AO1)
387 LFD a3, 2 * SIZE(AO1)
388 LFD a4, 3 * SIZE(AO1)
389 LFD a5, 4 * SIZE(AO1)
390 LFD a6, 5 * SIZE(AO1)
391 LFD a7, 6 * SIZE(AO1)
392 LFD a8, 7 * SIZE(AO1)
394 LFD y09, 8 * SIZE(Y1)
395 LFD y10, 9 * SIZE(Y1)
396 LFD y11, 10 * SIZE(Y1)
397 LFD y12, 11 * SIZE(Y1)
398 LFD y13, 12 * SIZE(Y1)
399 LFD y14, 13 * SIZE(Y1)
400 LFD y15, 14 * SIZE(Y1)
401 LFD y16, 15 * SIZE(Y1)
403 FMADD y01, alpha1, a1, y01
404 FMADD y02, alpha1, a2, y02
405 FMADD y03, alpha1, a3, y03
406 FMADD y04, alpha1, a4, y04
408 LFD a1, 8 * SIZE(AO1)
409 LFD a2, 9 * SIZE(AO1)
410 LFD a3, 10 * SIZE(AO1)
411 LFD a4, 11 * SIZE(AO1)
413 FMADD y05, alpha1, a5, y05
414 FMADD y06, alpha1, a6, y06
415 FMADD y07, alpha1, a7, y07
416 FMADD y08, alpha1, a8, y08
418 LFD a5, 12 * SIZE(AO1)
419 LFD a6, 13 * SIZE(AO1)
420 LFD a7, 14 * SIZE(AO1)
421 LFD a8, 15 * SIZE(AO1)
423 addi AO1, AO1, 16 * SIZE
428 FMADD y09, alpha1, a1, y09
429 FMADD y10, alpha1, a2, y10
430 FMADD y11, alpha1, a3, y11
431 FMADD y12, alpha1, a4, y12
433 LFD a1, 0 * SIZE(AO2)
434 LFD a2, 1 * SIZE(AO2)
435 LFD a3, 2 * SIZE(AO2)
436 LFD a4, 3 * SIZE(AO2)
438 FMADD y13, alpha1, a5, y13
439 FMADD y14, alpha1, a6, y14
440 FMADD y15, alpha1, a7, y15
441 FMADD y16, alpha1, a8, y16
443 LFD a5, 4 * SIZE(AO2)
444 LFD a6, 5 * SIZE(AO2)
445 LFD a7, 6 * SIZE(AO2)
446 LFD a8, 7 * SIZE(AO2)
448 FMADD y01, alpha2, a1, y01
449 FMADD y02, alpha2, a2, y02
450 FMADD y03, alpha2, a3, y03
451 FMADD y04, alpha2, a4, y04
453 LFD a1, 8 * SIZE(AO2)
454 LFD a2, 9 * SIZE(AO2)
455 LFD a3, 10 * SIZE(AO2)
456 LFD a4, 11 * SIZE(AO2)
458 FMADD y05, alpha2, a5, y05
459 FMADD y06, alpha2, a6, y06
460 FMADD y07, alpha2, a7, y07
461 FMADD y08, alpha2, a8, y08
463 LFD a5, 12 * SIZE(AO2)
464 LFD a6, 13 * SIZE(AO2)
465 LFD a7, 14 * SIZE(AO2)
466 LFD a8, 15 * SIZE(AO2)
468 addi AO2, AO2, 16 * SIZE
473 FMADD y09, alpha2, a1, y09
474 FMADD y10, alpha2, a2, y10
475 FMADD y11, alpha2, a3, y11
476 FMADD y12, alpha2, a4, y12
478 LFD a1, 0 * SIZE(AO3)
479 LFD a2, 1 * SIZE(AO3)
480 LFD a3, 2 * SIZE(AO3)
481 LFD a4, 3 * SIZE(AO3)
483 FMADD y13, alpha2, a5, y13
484 FMADD y14, alpha2, a6, y14
485 FMADD y15, alpha2, a7, y15
486 FMADD y16, alpha2, a8, y16
488 LFD a5, 4 * SIZE(AO3)
489 LFD a6, 5 * SIZE(AO3)
490 LFD a7, 6 * SIZE(AO3)
491 LFD a8, 7 * SIZE(AO3)
493 FMADD y01, alpha3, a1, y01
494 FMADD y02, alpha3, a2, y02
495 FMADD y03, alpha3, a3, y03
496 FMADD y04, alpha3, a4, y04
498 LFD a1, 8 * SIZE(AO3)
499 LFD a2, 9 * SIZE(AO3)
500 LFD a3, 10 * SIZE(AO3)
501 LFD a4, 11 * SIZE(AO3)
503 FMADD y05, alpha3, a5, y05
504 FMADD y06, alpha3, a6, y06
505 FMADD y07, alpha3, a7, y07
506 FMADD y08, alpha3, a8, y08
508 LFD a5, 12 * SIZE(AO3)
509 LFD a6, 13 * SIZE(AO3)
510 LFD a7, 14 * SIZE(AO3)
511 LFD a8, 15 * SIZE(AO3)
513 addi AO3, AO3, 16 * SIZE
518 FMADD y09, alpha3, a1, y09
519 FMADD y10, alpha3, a2, y10
520 FMADD y11, alpha3, a3, y11
521 FMADD y12, alpha3, a4, y12
523 LFD a1, 0 * SIZE(AO4)
524 LFD a2, 1 * SIZE(AO4)
525 LFD a3, 2 * SIZE(AO4)
526 LFD a4, 3 * SIZE(AO4)
528 FMADD y13, alpha3, a5, y13
529 FMADD y14, alpha3, a6, y14
530 FMADD y15, alpha3, a7, y15
531 FMADD y16, alpha3, a8, y16
533 LFD a5, 4 * SIZE(AO4)
534 LFD a6, 5 * SIZE(AO4)
535 LFD a7, 6 * SIZE(AO4)
536 LFD a8, 7 * SIZE(AO4)
538 FMADD y01, alpha4, a1, y01
539 FMADD y02, alpha4, a2, y02
540 FMADD y03, alpha4, a3, y03
541 FMADD y04, alpha4, a4, y04
543 LFD a1, 8 * SIZE(AO4)
544 LFD a2, 9 * SIZE(AO4)
545 LFD a3, 10 * SIZE(AO4)
546 LFD a4, 11 * SIZE(AO4)
548 FMADD y05, alpha4, a5, y05
549 FMADD y06, alpha4, a6, y06
550 FMADD y07, alpha4, a7, y07
551 FMADD y08, alpha4, a8, y08
553 LFD a5, 12 * SIZE(AO4)
554 LFD a6, 13 * SIZE(AO4)
555 LFD a7, 14 * SIZE(AO4)
556 LFD a8, 15 * SIZE(AO4)
558 addi AO4, AO4, 16 * SIZE
563 FMADD y09, alpha4, a1, y09
564 FMADD y10, alpha4, a2, y10
565 FMADD y11, alpha4, a3, y11
566 FMADD y12, alpha4, a4, y12
568 LFD a1, 0 * SIZE(AO5)
569 LFD a2, 1 * SIZE(AO5)
570 LFD a3, 2 * SIZE(AO5)
571 LFD a4, 3 * SIZE(AO5)
573 FMADD y13, alpha4, a5, y13
574 FMADD y14, alpha4, a6, y14
575 FMADD y15, alpha4, a7, y15
576 FMADD y16, alpha4, a8, y16
578 LFD a5, 4 * SIZE(AO5)
579 LFD a6, 5 * SIZE(AO5)
580 LFD a7, 6 * SIZE(AO5)
581 LFD a8, 7 * SIZE(AO5)
583 FMADD y01, alpha5, a1, y01
584 FMADD y02, alpha5, a2, y02
585 FMADD y03, alpha5, a3, y03
586 FMADD y04, alpha5, a4, y04
588 LFD a1, 8 * SIZE(AO5)
589 LFD a2, 9 * SIZE(AO5)
590 LFD a3, 10 * SIZE(AO5)
591 LFD a4, 11 * SIZE(AO5)
593 FMADD y05, alpha5, a5, y05
594 FMADD y06, alpha5, a6, y06
595 FMADD y07, alpha5, a7, y07
596 FMADD y08, alpha5, a8, y08
598 LFD a5, 12 * SIZE(AO5)
599 LFD a6, 13 * SIZE(AO5)
600 LFD a7, 14 * SIZE(AO5)
601 LFD a8, 15 * SIZE(AO5)
603 addi AO5, AO5, 16 * SIZE
608 FMADD y09, alpha5, a1, y09
609 FMADD y10, alpha5, a2, y10
610 FMADD y11, alpha5, a3, y11
611 FMADD y12, alpha5, a4, y12
613 LFD a1, 0 * SIZE(AO6)
614 LFD a2, 1 * SIZE(AO6)
615 LFD a3, 2 * SIZE(AO6)
616 LFD a4, 3 * SIZE(AO6)
618 FMADD y13, alpha5, a5, y13
619 FMADD y14, alpha5, a6, y14
620 FMADD y15, alpha5, a7, y15
621 FMADD y16, alpha5, a8, y16
623 LFD a5, 4 * SIZE(AO6)
624 LFD a6, 5 * SIZE(AO6)
625 LFD a7, 6 * SIZE(AO6)
626 LFD a8, 7 * SIZE(AO6)
628 FMADD y01, alpha6, a1, y01
629 FMADD y02, alpha6, a2, y02
630 FMADD y03, alpha6, a3, y03
631 FMADD y04, alpha6, a4, y04
633 LFD a1, 8 * SIZE(AO6)
634 LFD a2, 9 * SIZE(AO6)
635 LFD a3, 10 * SIZE(AO6)
636 LFD a4, 11 * SIZE(AO6)
638 FMADD y05, alpha6, a5, y05
639 FMADD y06, alpha6, a6, y06
640 FMADD y07, alpha6, a7, y07
641 FMADD y08, alpha6, a8, y08
643 LFD a5, 12 * SIZE(AO6)
644 LFD a6, 13 * SIZE(AO6)
645 LFD a7, 14 * SIZE(AO6)
646 LFD a8, 15 * SIZE(AO6)
648 addi AO6, AO6, 16 * SIZE
653 FMADD y09, alpha6, a1, y09
654 FMADD y10, alpha6, a2, y10
655 FMADD y11, alpha6, a3, y11
656 FMADD y12, alpha6, a4, y12
658 LFD a1, 0 * SIZE(AO7)
659 LFD a2, 1 * SIZE(AO7)
660 LFD a3, 2 * SIZE(AO7)
661 LFD a4, 3 * SIZE(AO7)
663 FMADD y13, alpha6, a5, y13
664 FMADD y14, alpha6, a6, y14
665 FMADD y15, alpha6, a7, y15
666 FMADD y16, alpha6, a8, y16
668 LFD a5, 4 * SIZE(AO7)
669 LFD a6, 5 * SIZE(AO7)
670 LFD a7, 6 * SIZE(AO7)
671 LFD a8, 7 * SIZE(AO7)
673 FMADD y01, alpha7, a1, y01
674 FMADD y02, alpha7, a2, y02
675 FMADD y03, alpha7, a3, y03
676 FMADD y04, alpha7, a4, y04
678 LFD a1, 8 * SIZE(AO7)
679 LFD a2, 9 * SIZE(AO7)
680 LFD a3, 10 * SIZE(AO7)
681 LFD a4, 11 * SIZE(AO7)
683 FMADD y05, alpha7, a5, y05
684 FMADD y06, alpha7, a6, y06
685 FMADD y07, alpha7, a7, y07
686 FMADD y08, alpha7, a8, y08
688 LFD a5, 12 * SIZE(AO7)
689 LFD a6, 13 * SIZE(AO7)
690 LFD a7, 14 * SIZE(AO7)
691 LFD a8, 15 * SIZE(AO7)
693 addi AO7, AO7, 16 * SIZE
698 FMADD y09, alpha7, a1, y09
699 FMADD y10, alpha7, a2, y10
700 FMADD y11, alpha7, a3, y11
701 FMADD y12, alpha7, a4, y12
703 LFD a1, 0 * SIZE(AO8)
704 LFD a2, 1 * SIZE(AO8)
705 LFD a3, 2 * SIZE(AO8)
706 LFD a4, 3 * SIZE(AO8)
708 FMADD y13, alpha7, a5, y13
709 FMADD y14, alpha7, a6, y14
710 FMADD y15, alpha7, a7, y15
711 FMADD y16, alpha7, a8, y16
713 LFD a5, 4 * SIZE(AO8)
714 LFD a6, 5 * SIZE(AO8)
715 LFD a7, 6 * SIZE(AO8)
716 LFD a8, 7 * SIZE(AO8)
718 FMADD y01, alpha8, a1, y01
719 FMADD y02, alpha8, a2, y02
720 FMADD y03, alpha8, a3, y03
721 FMADD y04, alpha8, a4, y04
723 LFD a1, 8 * SIZE(AO8)
724 LFD a2, 9 * SIZE(AO8)
725 LFD a3, 10 * SIZE(AO8)
726 LFD a4, 11 * SIZE(AO8)
728 FMADD y05, alpha8, a5, y05
729 FMADD y06, alpha8, a6, y06
730 FMADD y07, alpha8, a7, y07
731 FMADD y08, alpha8, a8, y08
733 LFD a5, 12 * SIZE(AO8)
734 LFD a6, 13 * SIZE(AO8)
735 LFD a7, 14 * SIZE(AO8)
736 LFD a8, 15 * SIZE(AO8)
738 addi AO8, AO8, 16 * SIZE
743 FMADD y09, alpha8, a1, y09
744 FMADD y10, alpha8, a2, y10
745 FMADD y11, alpha8, a3, y11
746 FMADD y12, alpha8, a4, y12
748 LFD a1, 0 * SIZE(AO1)
749 LFD a2, 1 * SIZE(AO1)
750 LFD a3, 2 * SIZE(AO1)
751 LFD a4, 3 * SIZE(AO1)
753 FMADD y13, alpha8, a5, y13
754 FMADD y14, alpha8, a6, y14
755 FMADD y15, alpha8, a7, y15
756 FMADD y16, alpha8, a8, y16
758 LFD a5, 4 * SIZE(AO1)
759 LFD a6, 5 * SIZE(AO1)
760 LFD a7, 6 * SIZE(AO1)
761 LFD a8, 7 * SIZE(AO1)
763 STFD y01, 0 * SIZE(Y1)
764 STFD y02, 1 * SIZE(Y1)
765 STFD y03, 2 * SIZE(Y1)
766 STFD y04, 3 * SIZE(Y1)
768 LFD y01, 16 * SIZE(Y1)
769 LFD y02, 17 * SIZE(Y1)
770 LFD y03, 18 * SIZE(Y1)
771 LFD y04, 19 * SIZE(Y1)
778 FMADD y01, alpha1, a1, y01
779 FMADD y02, alpha1, a2, y02
780 FMADD y03, alpha1, a3, y03
781 FMADD y04, alpha1, a4, y04
783 LFD a1, 8 * SIZE(AO1)
784 LFD a2, 9 * SIZE(AO1)
785 LFD a3, 10 * SIZE(AO1)
786 LFD a4, 11 * SIZE(AO1)
788 STFD y05, 4 * SIZE(Y1)
789 STFD y06, 5 * SIZE(Y1)
790 STFD y07, 6 * SIZE(Y1)
791 STFD y08, 7 * SIZE(Y1)
793 LFD y05, 20 * SIZE(Y1)
794 LFD y06, 21 * SIZE(Y1)
795 LFD y07, 22 * SIZE(Y1)
796 LFD y08, 23 * SIZE(Y1)
798 FMADD y05, alpha1, a5, y05
799 FMADD y06, alpha1, a6, y06
800 FMADD y07, alpha1, a7, y07
801 FMADD y08, alpha1, a8, y08
803 LFD a5, 12 * SIZE(AO1)
804 LFD a6, 13 * SIZE(AO1)
805 LFD a7, 14 * SIZE(AO1)
806 LFD a8, 15 * SIZE(AO1)
808 STFD y09, 8 * SIZE(Y1)
809 STFD y10, 9 * SIZE(Y1)
810 STFD y11, 10 * SIZE(Y1)
811 STFD y12, 11 * SIZE(Y1)
813 LFD y09, 24 * SIZE(Y1)
814 LFD y10, 25 * SIZE(Y1)
815 LFD y11, 26 * SIZE(Y1)
816 LFD y12, 27 * SIZE(Y1)
818 FMADD y09, alpha1, a1, y09
819 FMADD y10, alpha1, a2, y10
820 FMADD y11, alpha1, a3, y11
821 FMADD y12, alpha1, a4, y12
823 LFD a1, 0 * SIZE(AO2)
824 LFD a2, 1 * SIZE(AO2)
825 LFD a3, 2 * SIZE(AO2)
826 LFD a4, 3 * SIZE(AO2)
828 STFD y13, 12 * SIZE(Y1)
829 STFD y14, 13 * SIZE(Y1)
830 STFD y15, 14 * SIZE(Y1)
831 STFD y16, 15 * SIZE(Y1)
833 LFD y13, 28 * SIZE(Y1)
834 LFD y14, 29 * SIZE(Y1)
835 LFD y15, 30 * SIZE(Y1)
836 LFD y16, 31 * SIZE(Y1)
838 FMADD y13, alpha1, a5, y13
839 FMADD y14, alpha1, a6, y14
840 FMADD y15, alpha1, a7, y15
841 FMADD y16, alpha1, a8, y16
843 LFD a5, 4 * SIZE(AO2)
844 LFD a6, 5 * SIZE(AO2)
845 LFD a7, 6 * SIZE(AO2)
846 LFD a8, 7 * SIZE(AO2)
848 FMADD y01, alpha2, a1, y01
849 FMADD y02, alpha2, a2, y02
850 FMADD y03, alpha2, a3, y03
851 FMADD y04, alpha2, a4, y04
853 LFD a1, 8 * SIZE(AO2)
854 LFD a2, 9 * SIZE(AO2)
855 LFD a3, 10 * SIZE(AO2)
856 LFD a4, 11 * SIZE(AO2)
858 FMADD y05, alpha2, a5, y05
859 FMADD y06, alpha2, a6, y06
860 FMADD y07, alpha2, a7, y07
861 FMADD y08, alpha2, a8, y08
863 LFD a5, 12 * SIZE(AO2)
864 LFD a6, 13 * SIZE(AO2)
865 LFD a7, 14 * SIZE(AO2)
866 LFD a8, 15 * SIZE(AO2)
868 FMADD y09, alpha2, a1, y09
869 FMADD y10, alpha2, a2, y10
870 FMADD y11, alpha2, a3, y11
871 FMADD y12, alpha2, a4, y12
873 LFD a1, 0 * SIZE(AO3)
874 LFD a2, 1 * SIZE(AO3)
875 LFD a3, 2 * SIZE(AO3)
876 LFD a4, 3 * SIZE(AO3)
878 FMADD y13, alpha2, a5, y13
879 FMADD y14, alpha2, a6, y14
880 FMADD y15, alpha2, a7, y15
881 FMADD y16, alpha2, a8, y16
883 LFD a5, 4 * SIZE(AO3)
884 LFD a6, 5 * SIZE(AO3)
885 LFD a7, 6 * SIZE(AO3)
886 LFD a8, 7 * SIZE(AO3)
888 FMADD y01, alpha3, a1, y01
889 FMADD y02, alpha3, a2, y02
890 FMADD y03, alpha3, a3, y03
891 FMADD y04, alpha3, a4, y04
893 LFD a1, 8 * SIZE(AO3)
894 LFD a2, 9 * SIZE(AO3)
895 LFD a3, 10 * SIZE(AO3)
896 LFD a4, 11 * SIZE(AO3)
898 FMADD y05, alpha3, a5, y05
899 FMADD y06, alpha3, a6, y06
900 FMADD y07, alpha3, a7, y07
901 FMADD y08, alpha3, a8, y08
903 LFD a5, 12 * SIZE(AO3)
904 LFD a6, 13 * SIZE(AO3)
905 LFD a7, 14 * SIZE(AO3)
906 LFD a8, 15 * SIZE(AO3)
908 FMADD y09, alpha3, a1, y09
909 FMADD y10, alpha3, a2, y10
910 FMADD y11, alpha3, a3, y11
911 FMADD y12, alpha3, a4, y12
913 LFD a1, 0 * SIZE(AO4)
914 LFD a2, 1 * SIZE(AO4)
915 LFD a3, 2 * SIZE(AO4)
916 LFD a4, 3 * SIZE(AO4)
918 FMADD y13, alpha3, a5, y13
919 FMADD y14, alpha3, a6, y14
920 FMADD y15, alpha3, a7, y15
921 FMADD y16, alpha3, a8, y16
923 LFD a5, 4 * SIZE(AO4)
924 LFD a6, 5 * SIZE(AO4)
925 LFD a7, 6 * SIZE(AO4)
926 LFD a8, 7 * SIZE(AO4)
928 FMADD y01, alpha4, a1, y01
929 FMADD y02, alpha4, a2, y02
930 FMADD y03, alpha4, a3, y03
931 FMADD y04, alpha4, a4, y04
933 LFD a1, 8 * SIZE(AO4)
934 LFD a2, 9 * SIZE(AO4)
935 LFD a3, 10 * SIZE(AO4)
936 LFD a4, 11 * SIZE(AO4)
938 FMADD y05, alpha4, a5, y05
939 FMADD y06, alpha4, a6, y06
940 FMADD y07, alpha4, a7, y07
941 FMADD y08, alpha4, a8, y08
943 LFD a5, 12 * SIZE(AO4)
944 LFD a6, 13 * SIZE(AO4)
945 LFD a7, 14 * SIZE(AO4)
946 LFD a8, 15 * SIZE(AO4)
948 addi AO1, AO1, 16 * SIZE
949 addi AO2, AO2, 16 * SIZE
950 addi AO3, AO3, 16 * SIZE
951 addi AO4, AO4, 16 * SIZE
958 FMADD y09, alpha4, a1, y09
959 FMADD y10, alpha4, a2, y10
960 FMADD y11, alpha4, a3, y11
961 FMADD y12, alpha4, a4, y12
963 LFD a1, 0 * SIZE(AO5)
964 LFD a2, 1 * SIZE(AO5)
965 LFD a3, 2 * SIZE(AO5)
966 LFD a4, 3 * SIZE(AO5)
968 FMADD y13, alpha4, a5, y13
969 FMADD y14, alpha4, a6, y14
970 FMADD y15, alpha4, a7, y15
971 FMADD y16, alpha4, a8, y16
973 LFD a5, 4 * SIZE(AO5)
974 LFD a6, 5 * SIZE(AO5)
975 LFD a7, 6 * SIZE(AO5)
976 LFD a8, 7 * SIZE(AO5)
978 FMADD y01, alpha5, a1, y01
979 FMADD y02, alpha5, a2, y02
980 FMADD y03, alpha5, a3, y03
981 FMADD y04, alpha5, a4, y04
983 LFD a1, 8 * SIZE(AO5)
984 LFD a2, 9 * SIZE(AO5)
985 LFD a3, 10 * SIZE(AO5)
986 LFD a4, 11 * SIZE(AO5)
988 FMADD y05, alpha5, a5, y05
989 FMADD y06, alpha5, a6, y06
990 FMADD y07, alpha5, a7, y07
991 FMADD y08, alpha5, a8, y08
993 LFD a5, 12 * SIZE(AO5)
994 LFD a6, 13 * SIZE(AO5)
995 LFD a7, 14 * SIZE(AO5)
996 LFD a8, 15 * SIZE(AO5)
998 FMADD y09, alpha5, a1, y09
999 FMADD y10, alpha5, a2, y10
1000 FMADD y11, alpha5, a3, y11
1001 FMADD y12, alpha5, a4, y12
1003 LFD a1, 0 * SIZE(AO6)
1004 LFD a2, 1 * SIZE(AO6)
1005 LFD a3, 2 * SIZE(AO6)
1006 LFD a4, 3 * SIZE(AO6)
1008 FMADD y13, alpha5, a5, y13
1009 FMADD y14, alpha5, a6, y14
1010 FMADD y15, alpha5, a7, y15
1011 FMADD y16, alpha5, a8, y16
1013 LFD a5, 4 * SIZE(AO6)
1014 LFD a6, 5 * SIZE(AO6)
1015 LFD a7, 6 * SIZE(AO6)
1016 LFD a8, 7 * SIZE(AO6)
1018 FMADD y01, alpha6, a1, y01
1019 FMADD y02, alpha6, a2, y02
1020 FMADD y03, alpha6, a3, y03
1021 FMADD y04, alpha6, a4, y04
1023 LFD a1, 8 * SIZE(AO6)
1024 LFD a2, 9 * SIZE(AO6)
1025 LFD a3, 10 * SIZE(AO6)
1026 LFD a4, 11 * SIZE(AO6)
1028 FMADD y05, alpha6, a5, y05
1029 FMADD y06, alpha6, a6, y06
1030 FMADD y07, alpha6, a7, y07
1031 FMADD y08, alpha6, a8, y08
1033 LFD a5, 12 * SIZE(AO6)
1034 LFD a6, 13 * SIZE(AO6)
1035 LFD a7, 14 * SIZE(AO6)
1036 LFD a8, 15 * SIZE(AO6)
1038 FMADD y09, alpha6, a1, y09
1039 FMADD y10, alpha6, a2, y10
1040 FMADD y11, alpha6, a3, y11
1041 FMADD y12, alpha6, a4, y12
1043 LFD a1, 0 * SIZE(AO7)
1044 LFD a2, 1 * SIZE(AO7)
1045 LFD a3, 2 * SIZE(AO7)
1046 LFD a4, 3 * SIZE(AO7)
1048 FMADD y13, alpha6, a5, y13
1049 FMADD y14, alpha6, a6, y14
1050 FMADD y15, alpha6, a7, y15
1051 FMADD y16, alpha6, a8, y16
1053 LFD a5, 4 * SIZE(AO7)
1054 LFD a6, 5 * SIZE(AO7)
1055 LFD a7, 6 * SIZE(AO7)
1056 LFD a8, 7 * SIZE(AO7)
1058 FMADD y01, alpha7, a1, y01
1059 FMADD y02, alpha7, a2, y02
1060 FMADD y03, alpha7, a3, y03
1061 FMADD y04, alpha7, a4, y04
1063 LFD a1, 8 * SIZE(AO7)
1064 LFD a2, 9 * SIZE(AO7)
1065 LFD a3, 10 * SIZE(AO7)
1066 LFD a4, 11 * SIZE(AO7)
1068 FMADD y05, alpha7, a5, y05
1069 FMADD y06, alpha7, a6, y06
1070 FMADD y07, alpha7, a7, y07
1071 FMADD y08, alpha7, a8, y08
1073 LFD a5, 12 * SIZE(AO7)
1074 LFD a6, 13 * SIZE(AO7)
1075 LFD a7, 14 * SIZE(AO7)
1076 LFD a8, 15 * SIZE(AO7)
1078 FMADD y09, alpha7, a1, y09
1079 FMADD y10, alpha7, a2, y10
1080 FMADD y11, alpha7, a3, y11
1081 FMADD y12, alpha7, a4, y12
1083 LFD a1, 0 * SIZE(AO8)
1084 LFD a2, 1 * SIZE(AO8)
1085 LFD a3, 2 * SIZE(AO8)
1086 LFD a4, 3 * SIZE(AO8)
1088 FMADD y13, alpha7, a5, y13
1089 FMADD y14, alpha7, a6, y14
1090 FMADD y15, alpha7, a7, y15
1091 FMADD y16, alpha7, a8, y16
1093 LFD a5, 4 * SIZE(AO8)
1094 LFD a6, 5 * SIZE(AO8)
1095 LFD a7, 6 * SIZE(AO8)
1096 LFD a8, 7 * SIZE(AO8)
1098 FMADD y01, alpha8, a1, y01
1099 FMADD y02, alpha8, a2, y02
1100 FMADD y03, alpha8, a3, y03
1101 FMADD y04, alpha8, a4, y04
1103 LFD a1, 8 * SIZE(AO8)
1104 LFD a2, 9 * SIZE(AO8)
1105 LFD a3, 10 * SIZE(AO8)
1106 LFD a4, 11 * SIZE(AO8)
1108 FMADD y05, alpha8, a5, y05
1109 FMADD y06, alpha8, a6, y06
1110 FMADD y07, alpha8, a7, y07
1111 FMADD y08, alpha8, a8, y08
1113 LFD a5, 12 * SIZE(AO8)
1114 LFD a6, 13 * SIZE(AO8)
1115 LFD a7, 14 * SIZE(AO8)
1116 LFD a8, 15 * SIZE(AO8)
1118 addi AO5, AO5, 16 * SIZE
1119 addi AO6, AO6, 16 * SIZE
1120 addi AO7, AO7, 16 * SIZE
1121 addi AO8, AO8, 16 * SIZE
1128 FMADD y09, alpha8, a1, y09
1129 FMADD y10, alpha8, a2, y10
1130 FMADD y11, alpha8, a3, y11
1131 FMADD y12, alpha8, a4, y12
1133 LFD a1, 0 * SIZE(AO1)
1134 LFD a2, 1 * SIZE(AO1)
1135 LFD a3, 2 * SIZE(AO1)
1136 LFD a4, 3 * SIZE(AO1)
1138 FMADD y13, alpha8, a5, y13
1139 FMADD y14, alpha8, a6, y14
1140 FMADD y15, alpha8, a7, y15
1141 FMADD y16, alpha8, a8, y16
1143 LFD a5, 4 * SIZE(AO1)
1144 LFD a6, 5 * SIZE(AO1)
1145 LFD a7, 6 * SIZE(AO1)
1146 LFD a8, 7 * SIZE(AO1)
1148 STFD y01, 16 * SIZE(Y1)
1149 STFD y02, 17 * SIZE(Y1)
1150 STFD y03, 18 * SIZE(Y1)
1151 STFD y04, 19 * SIZE(Y1)
1153 LFD y01, 32 * SIZE(Y1)
1154 LFD y02, 33 * SIZE(Y1)
1155 LFD y03, 34 * SIZE(Y1)
1156 LFD y04, 35 * SIZE(Y1)
1159 addi Y1, Y1, 16 * SIZE
1164 STFD y05, 4 * SIZE(Y1)
1165 STFD y06, 5 * SIZE(Y1)
1166 STFD y07, 6 * SIZE(Y1)
1167 STFD y08, 7 * SIZE(Y1)
1169 STFD y09, 8 * SIZE(Y1)
1170 STFD y10, 9 * SIZE(Y1)
1171 STFD y11, 10 * SIZE(Y1)
1172 STFD y12, 11 * SIZE(Y1)
1174 STFD y13, 12 * SIZE(Y1)
1175 STFD y14, 13 * SIZE(Y1)
1176 STFD y15, 14 * SIZE(Y1)
1177 STFD y16, 15 * SIZE(Y1)
1179 addi Y1, Y1, 16 * SIZE
1189 LFD y01, 0 * SIZE(Y1)
1190 LFD y02, 1 * SIZE(Y1)
1191 LFD y03, 2 * SIZE(Y1)
1192 LFD y04, 3 * SIZE(Y1)
1194 LFD a1, 0 * SIZE(AO1)
1195 LFD a2, 1 * SIZE(AO1)
1196 LFD a3, 2 * SIZE(AO1)
1197 LFD a4, 3 * SIZE(AO1)
1199 LFD y05, 4 * SIZE(Y1)
1200 LFD y06, 5 * SIZE(Y1)
1201 LFD y07, 6 * SIZE(Y1)
1202 LFD y08, 7 * SIZE(Y1)
1204 LFD a5, 4 * SIZE(AO1)
1205 LFD a6, 5 * SIZE(AO1)
1206 LFD a7, 6 * SIZE(AO1)
1207 LFD a8, 7 * SIZE(AO1)
1209 FMADD y01, alpha1, a1, y01
1210 LFD a1, 0 * SIZE(AO2)
1211 FMADD y02, alpha1, a2, y02
1212 LFD a2, 1 * SIZE(AO2)
1213 FMADD y03, alpha1, a3, y03
1214 LFD a3, 2 * SIZE(AO2)
1215 FMADD y04, alpha1, a4, y04
1216 LFD a4, 3 * SIZE(AO2)
1218 FMADD y05, alpha1, a5, y05
1219 LFD a5, 4 * SIZE(AO2)
1220 FMADD y06, alpha1, a6, y06
1221 LFD a6, 5 * SIZE(AO2)
1222 FMADD y07, alpha1, a7, y07
1223 LFD a7, 6 * SIZE(AO2)
1224 FMADD y08, alpha1, a8, y08
1225 LFD a8, 7 * SIZE(AO2)
1227 FMADD y01, alpha2, a1, y01
1228 LFD a1, 0 * SIZE(AO3)
1229 FMADD y02, alpha2, a2, y02
1230 LFD a2, 1 * SIZE(AO3)
1232 FMADD y03, alpha2, a3, y03
1233 LFD a3, 2 * SIZE(AO3)
1234 FMADD y04, alpha2, a4, y04
1235 LFD a4, 3 * SIZE(AO3)
1237 FMADD y05, alpha2, a5, y05
1238 LFD a5, 4 * SIZE(AO3)
1239 FMADD y06, alpha2, a6, y06
1240 LFD a6, 5 * SIZE(AO3)
1242 FMADD y07, alpha2, a7, y07
1243 LFD a7, 6 * SIZE(AO3)
1244 FMADD y08, alpha2, a8, y08
1245 LFD a8, 7 * SIZE(AO3)
1247 FMADD y01, alpha3, a1, y01
1248 LFD a1, 0 * SIZE(AO4)
1249 FMADD y02, alpha3, a2, y02
1250 LFD a2, 1 * SIZE(AO4)
1252 FMADD y03, alpha3, a3, y03
1253 LFD a3, 2 * SIZE(AO4)
1254 FMADD y04, alpha3, a4, y04
1255 LFD a4, 3 * SIZE(AO4)
1257 FMADD y05, alpha3, a5, y05
1258 LFD a5, 4 * SIZE(AO4)
1259 FMADD y06, alpha3, a6, y06
1260 LFD a6, 5 * SIZE(AO4)
1262 FMADD y07, alpha3, a7, y07
1263 LFD a7, 6 * SIZE(AO4)
1264 FMADD y08, alpha3, a8, y08
1265 LFD a8, 7 * SIZE(AO4)
1267 FMADD y01, alpha4, a1, y01
1268 LFD a1, 0 * SIZE(AO5)
1269 FMADD y02, alpha4, a2, y02
1270 LFD a2, 1 * SIZE(AO5)
1272 FMADD y03, alpha4, a3, y03
1273 LFD a3, 2 * SIZE(AO5)
1274 FMADD y04, alpha4, a4, y04
1275 LFD a4, 3 * SIZE(AO5)
1277 FMADD y05, alpha4, a5, y05
1278 LFD a5, 4 * SIZE(AO5)
1279 FMADD y06, alpha4, a6, y06
1280 LFD a6, 5 * SIZE(AO5)
1282 FMADD y07, alpha4, a7, y07
1283 LFD a7, 6 * SIZE(AO5)
1284 FMADD y08, alpha4, a8, y08
1285 LFD a8, 7 * SIZE(AO5)
1287 FMADD y01, alpha5, a1, y01
1288 LFD a1, 0 * SIZE(AO6)
1289 FMADD y02, alpha5, a2, y02
1290 LFD a2, 1 * SIZE(AO6)
1292 FMADD y03, alpha5, a3, y03
1293 LFD a3, 2 * SIZE(AO6)
1294 FMADD y04, alpha5, a4, y04
1295 LFD a4, 3 * SIZE(AO6)
1297 FMADD y05, alpha5, a5, y05
1298 LFD a5, 4 * SIZE(AO6)
1299 FMADD y06, alpha5, a6, y06
1300 LFD a6, 5 * SIZE(AO6)
1302 FMADD y07, alpha5, a7, y07
1303 LFD a7, 6 * SIZE(AO6)
1304 FMADD y08, alpha5, a8, y08
1305 LFD a8, 7 * SIZE(AO6)
1307 FMADD y01, alpha6, a1, y01
1308 LFD a1, 0 * SIZE(AO7)
1309 FMADD y02, alpha6, a2, y02
1310 LFD a2, 1 * SIZE(AO7)
1312 FMADD y03, alpha6, a3, y03
1313 LFD a3, 2 * SIZE(AO7)
1314 FMADD y04, alpha6, a4, y04
1315 LFD a4, 3 * SIZE(AO7)
1317 FMADD y05, alpha6, a5, y05
1318 LFD a5, 4 * SIZE(AO7)
1319 FMADD y06, alpha6, a6, y06
1320 LFD a6, 5 * SIZE(AO7)
1322 FMADD y07, alpha6, a7, y07
1323 LFD a7, 6 * SIZE(AO7)
1324 FMADD y08, alpha6, a8, y08
1325 LFD a8, 7 * SIZE(AO7)
1327 FMADD y01, alpha7, a1, y01
1328 LFD a1, 0 * SIZE(AO8)
1329 FMADD y02, alpha7, a2, y02
1330 LFD a2, 1 * SIZE(AO8)
1332 FMADD y03, alpha7, a3, y03
1333 LFD a3, 2 * SIZE(AO8)
1334 FMADD y04, alpha7, a4, y04
1335 LFD a4, 3 * SIZE(AO8)
1337 FMADD y05, alpha7, a5, y05
1338 LFD a5, 4 * SIZE(AO8)
1339 FMADD y06, alpha7, a6, y06
1340 LFD a6, 5 * SIZE(AO8)
1342 FMADD y07, alpha7, a7, y07
1343 LFD a7, 6 * SIZE(AO8)
1344 FMADD y08, alpha7, a8, y08
1345 LFD a8, 7 * SIZE(AO8)
1347 FMADD y01, alpha8, a1, y01
1348 addi AO1, AO1, 8 * SIZE
1349 FMADD y02, alpha8, a2, y02
1350 addi AO2, AO2, 8 * SIZE
1351 FMADD y03, alpha8, a3, y03
1352 addi AO3, AO3, 8 * SIZE
1353 FMADD y04, alpha8, a4, y04
1354 addi AO4, AO4, 8 * SIZE
1356 STFD y01, 0 * SIZE(Y1)
1357 STFD y02, 1 * SIZE(Y1)
1358 STFD y03, 2 * SIZE(Y1)
1359 STFD y04, 3 * SIZE(Y1)
1361 FMADD y05, alpha8, a5, y05
1362 addi AO5, AO5, 8 * SIZE
1363 FMADD y06, alpha8, a6, y06
1364 addi AO6, AO6, 8 * SIZE
1365 FMADD y07, alpha8, a7, y07
1366 addi AO7, AO7, 8 * SIZE
1367 FMADD y08, alpha8, a8, y08
1368 addi AO8, AO8, 8 * SIZE
1370 STFD y05, 4 * SIZE(Y1)
1371 STFD y06, 5 * SIZE(Y1)
1372 STFD y07, 6 * SIZE(Y1)
1373 STFD y08, 7 * SIZE(Y1)
1374 addi Y1, Y1, 8 * SIZE
1381 LFD y01, 0 * SIZE(Y1)
1382 LFD y02, 1 * SIZE(Y1)
1383 LFD y03, 2 * SIZE(Y1)
1384 LFD y04, 3 * SIZE(Y1)
1386 LFD a1, 0 * SIZE(AO1)
1387 LFD a2, 1 * SIZE(AO1)
1388 LFD a3, 2 * SIZE(AO1)
1389 LFD a4, 3 * SIZE(AO1)
1391 LFD a5, 0 * SIZE(AO2)
1392 LFD a6, 1 * SIZE(AO2)
1393 LFD a7, 2 * SIZE(AO2)
1394 LFD a8, 3 * SIZE(AO2)
1396 FMADD y01, alpha1, a1, y01
1397 LFD a1, 0 * SIZE(AO3)
1398 FMADD y02, alpha1, a2, y02
1399 LFD a2, 1 * SIZE(AO3)
1400 FMADD y03, alpha1, a3, y03
1401 LFD a3, 2 * SIZE(AO3)
1402 FMADD y04, alpha1, a4, y04
1403 LFD a4, 3 * SIZE(AO3)
1405 FMADD y01, alpha2, a5, y01
1406 LFD a5, 0 * SIZE(AO4)
1407 FMADD y02, alpha2, a6, y02
1408 LFD a6, 1 * SIZE(AO4)
1409 FMADD y03, alpha2, a7, y03
1410 LFD a7, 2 * SIZE(AO4)
1411 FMADD y04, alpha2, a8, y04
1412 LFD a8, 3 * SIZE(AO4)
1414 FMADD y01, alpha3, a1, y01
1415 LFD a1, 0 * SIZE(AO5)
1416 FMADD y02, alpha3, a2, y02
1417 LFD a2, 1 * SIZE(AO5)
1418 FMADD y03, alpha3, a3, y03
1419 LFD a3, 2 * SIZE(AO5)
1420 FMADD y04, alpha3, a4, y04
1421 LFD a4, 3 * SIZE(AO5)
1423 FMADD y01, alpha4, a5, y01
1424 LFD a5, 0 * SIZE(AO6)
1425 FMADD y02, alpha4, a6, y02
1426 LFD a6, 1 * SIZE(AO6)
1427 FMADD y03, alpha4, a7, y03
1428 LFD a7, 2 * SIZE(AO6)
1429 FMADD y04, alpha4, a8, y04
1430 LFD a8, 3 * SIZE(AO6)
1432 FMADD y01, alpha5, a1, y01
1433 LFD a1, 0 * SIZE(AO7)
1434 FMADD y02, alpha5, a2, y02
1435 LFD a2, 1 * SIZE(AO7)
1436 FMADD y03, alpha5, a3, y03
1437 LFD a3, 2 * SIZE(AO7)
1438 FMADD y04, alpha5, a4, y04
1439 LFD a4, 3 * SIZE(AO7)
1441 FMADD y01, alpha6, a5, y01
1442 LFD a5, 0 * SIZE(AO8)
1443 FMADD y02, alpha6, a6, y02
1444 LFD a6, 1 * SIZE(AO8)
1445 FMADD y03, alpha6, a7, y03
1446 LFD a7, 2 * SIZE(AO8)
1447 FMADD y04, alpha6, a8, y04
1448 LFD a8, 3 * SIZE(AO8)
1450 FMADD y01, alpha7, a1, y01
1451 addi AO1, AO1, 4 * SIZE
1452 FMADD y02, alpha7, a2, y02
1453 addi AO2, AO2, 4 * SIZE
1454 FMADD y03, alpha7, a3, y03
1455 addi AO3, AO3, 4 * SIZE
1456 FMADD y04, alpha7, a4, y04
1457 addi AO4, AO4, 4 * SIZE
1459 FMADD y01, alpha8, a5, y01
1460 addi AO5, AO5, 4 * SIZE
1461 FMADD y02, alpha8, a6, y02
1462 addi AO6, AO6, 4 * SIZE
1463 FMADD y03, alpha8, a7, y03
1464 addi AO7, AO7, 4 * SIZE
1465 FMADD y04, alpha8, a8, y04
1466 addi AO8, AO8, 4 * SIZE
1468 STFD y01, 0 * SIZE(Y1)
1469 STFD y02, 1 * SIZE(Y1)
1470 STFD y03, 2 * SIZE(Y1)
1471 STFD y04, 3 * SIZE(Y1)
1472 addi Y1, Y1, 4 * SIZE
1479 LFD y01, 0 * SIZE(Y1)
1480 LFD y02, 1 * SIZE(Y1)
1482 LFD a1, 0 * SIZE(AO1)
1483 LFD a2, 1 * SIZE(AO1)
1484 LFD a3, 0 * SIZE(AO2)
1485 LFD a4, 1 * SIZE(AO2)
1487 LFD a5, 0 * SIZE(AO3)
1488 LFD a6, 1 * SIZE(AO3)
1489 LFD a7, 0 * SIZE(AO4)
1490 LFD a8, 1 * SIZE(AO4)
1492 FMADD y01, alpha1, a1, y01
1493 LFD a1, 0 * SIZE(AO5)
1494 FMADD y02, alpha1, a2, y02
1495 LFD a2, 1 * SIZE(AO5)
1496 FMADD y01, alpha2, a3, y01
1497 LFD a3, 0 * SIZE(AO6)
1498 FMADD y02, alpha2, a4, y02
1499 LFD a4, 1 * SIZE(AO6)
1501 FMADD y01, alpha3, a5, y01
1502 LFD a5, 0 * SIZE(AO7)
1503 FMADD y02, alpha3, a6, y02
1504 LFD a6, 1 * SIZE(AO7)
1505 FMADD y01, alpha4, a7, y01
1506 LFD a7, 0 * SIZE(AO8)
1507 FMADD y02, alpha4, a8, y02
1508 LFD a8, 1 * SIZE(AO8)
1510 FMADD y01, alpha5, a1, y01
1511 addi AO1, AO1, 2 * SIZE
1512 FMADD y02, alpha5, a2, y02
1513 addi AO2, AO2, 2 * SIZE
1514 FMADD y01, alpha6, a3, y01
1515 addi AO3, AO3, 2 * SIZE
1516 FMADD y02, alpha6, a4, y02
1517 addi AO4, AO4, 2 * SIZE
1519 FMADD y01, alpha7, a5, y01
1520 addi AO5, AO5, 2 * SIZE
1521 FMADD y02, alpha7, a6, y02
1522 addi AO6, AO6, 2 * SIZE
1523 FMADD y01, alpha8, a7, y01
1524 addi AO7, AO7, 2 * SIZE
1525 FMADD y02, alpha8, a8, y02
1526 addi AO8, AO8, 2 * SIZE
1528 STFD y01, 0 * SIZE(Y1)
1529 STFD y02, 1 * SIZE(Y1)
1530 addi Y1, Y1, 2 * SIZE
1537 LFD y01, 0 * SIZE(Y1)
1539 LFD a1, 0 * SIZE(AO1)
1540 LFD a2, 0 * SIZE(AO2)
1541 LFD a3, 0 * SIZE(AO3)
1542 LFD a4, 0 * SIZE(AO4)
1543 LFD a5, 0 * SIZE(AO5)
1544 LFD a6, 0 * SIZE(AO6)
1545 LFD a7, 0 * SIZE(AO7)
1546 LFD a8, 0 * SIZE(AO8)
1548 FMADD y01, alpha1, a1, y01
1549 FMADD y01, alpha2, a2, y01
1550 FMADD y01, alpha3, a3, y01
1551 FMADD y01, alpha4, a4, y01
1553 FMADD y01, alpha5, a5, y01
1554 FMADD y01, alpha6, a6, y01
1555 FMADD y01, alpha7, a7, y01
1556 FMADD y01, alpha8, a8, y01
1558 STFD y01, 0 * SIZE(Y1)
1575 LFD alpha1, 0 * SIZE(X)
1577 LFD alpha2, 0 * SIZE(X)
1579 LFD alpha3, 0 * SIZE(X)
1581 LFD alpha4, 0 * SIZE(X)
1584 FMUL alpha1, alpha, alpha1
1586 FMUL alpha2, alpha, alpha2
1588 FMUL alpha3, alpha, alpha3
1590 FMUL alpha4, alpha, alpha4
1597 LFD y01, 0 * SIZE(Y1)
1598 LFD y02, 1 * SIZE(Y1)
1599 LFD y03, 2 * SIZE(Y1)
1600 LFD y04, 3 * SIZE(Y1)
1601 LFD y05, 4 * SIZE(Y1)
1602 LFD y06, 5 * SIZE(Y1)
1603 LFD y07, 6 * SIZE(Y1)
1604 LFD y08, 7 * SIZE(Y1)
1605 LFD y09, 8 * SIZE(Y1)
1606 LFD y10, 9 * SIZE(Y1)
1607 LFD y11, 10 * SIZE(Y1)
1608 LFD y12, 11 * SIZE(Y1)
1609 LFD y13, 12 * SIZE(Y1)
1610 LFD y14, 13 * SIZE(Y1)
1611 LFD y15, 14 * SIZE(Y1)
1612 LFD y16, 15 * SIZE(Y1)
1614 LFD a1, 0 * SIZE(AO1)
1615 LFD a2, 1 * SIZE(AO1)
1616 LFD a3, 2 * SIZE(AO1)
1617 LFD a4, 3 * SIZE(AO1)
1618 LFD a5, 4 * SIZE(AO1)
1619 LFD a6, 5 * SIZE(AO1)
1620 LFD a7, 6 * SIZE(AO1)
1621 LFD a8, 7 * SIZE(AO1)
1626 FMADD y01, alpha1, a1, y01
1627 LFD a1, 8 * SIZE(AO1)
1628 FMADD y02, alpha1, a2, y02
1629 LFD a2, 9 * SIZE(AO1)
1630 FMADD y03, alpha1, a3, y03
1631 LFD a3, 10 * SIZE(AO1)
1632 FMADD y04, alpha1, a4, y04
1633 LFD a4, 11 * SIZE(AO1)
1635 FMADD y05, alpha1, a5, y05
1636 LFD a5, 12 * SIZE(AO1)
1637 FMADD y06, alpha1, a6, y06
1638 LFD a6, 13 * SIZE(AO1)
1639 FMADD y07, alpha1, a7, y07
1640 LFD a7, 14 * SIZE(AO1)
1641 FMADD y08, alpha1, a8, y08
1642 LFD a8, 15 * SIZE(AO1)
1644 FMADD y09, alpha1, a1, y09
1645 LFD a1, 0 * SIZE(AO2)
1646 FMADD y10, alpha1, a2, y10
1647 LFD a2, 1 * SIZE(AO2)
1648 FMADD y11, alpha1, a3, y11
1649 LFD a3, 2 * SIZE(AO2)
1650 FMADD y12, alpha1, a4, y12
1651 LFD a4, 3 * SIZE(AO2)
1653 FMADD y13, alpha1, a5, y13
1654 LFD a5, 4 * SIZE(AO2)
1655 FMADD y14, alpha1, a6, y14
1656 LFD a6, 5 * SIZE(AO2)
1657 FMADD y15, alpha1, a7, y15
1658 LFD a7, 6 * SIZE(AO2)
1659 FMADD y16, alpha1, a8, y16
1660 LFD a8, 7 * SIZE(AO2)
1662 FMADD y01, alpha2, a1, y01
1663 LFD a1, 8 * SIZE(AO2)
1664 FMADD y02, alpha2, a2, y02
1665 LFD a2, 9 * SIZE(AO2)
1666 FMADD y03, alpha2, a3, y03
1667 LFD a3, 10 * SIZE(AO2)
1668 FMADD y04, alpha2, a4, y04
1669 LFD a4, 11 * SIZE(AO2)
1671 FMADD y05, alpha2, a5, y05
1672 LFD a5, 12 * SIZE(AO2)
1673 FMADD y06, alpha2, a6, y06
1674 LFD a6, 13 * SIZE(AO2)
1675 FMADD y07, alpha2, a7, y07
1676 LFD a7, 14 * SIZE(AO2)
1677 FMADD y08, alpha2, a8, y08
1678 LFD a8, 15 * SIZE(AO2)
1680 addi AO1, AO1, 16 * SIZE
1681 addi AO2, AO2, 16 * SIZE
1685 FMADD y09, alpha2, a1, y09
1686 LFD a1, 0 * SIZE(AO3)
1687 FMADD y10, alpha2, a2, y10
1688 LFD a2, 1 * SIZE(AO3)
1689 FMADD y11, alpha2, a3, y11
1690 LFD a3, 2 * SIZE(AO3)
1691 FMADD y12, alpha2, a4, y12
1692 LFD a4, 3 * SIZE(AO3)
1694 FMADD y13, alpha2, a5, y13
1695 LFD a5, 4 * SIZE(AO3)
1696 FMADD y14, alpha2, a6, y14
1697 LFD a6, 5 * SIZE(AO3)
1698 FMADD y15, alpha2, a7, y15
1699 LFD a7, 6 * SIZE(AO3)
1700 FMADD y16, alpha2, a8, y16
1701 LFD a8, 7 * SIZE(AO3)
1703 FMADD y01, alpha3, a1, y01
1704 LFD a1, 8 * SIZE(AO3)
1705 FMADD y02, alpha3, a2, y02
1706 LFD a2, 9 * SIZE(AO3)
1707 FMADD y03, alpha3, a3, y03
1708 LFD a3, 10 * SIZE(AO3)
1709 FMADD y04, alpha3, a4, y04
1710 LFD a4, 11 * SIZE(AO3)
1712 FMADD y05, alpha3, a5, y05
1713 LFD a5, 12 * SIZE(AO3)
1714 FMADD y06, alpha3, a6, y06
1715 LFD a6, 13 * SIZE(AO3)
1716 FMADD y07, alpha3, a7, y07
1717 LFD a7, 14 * SIZE(AO3)
1718 FMADD y08, alpha3, a8, y08
1719 LFD a8, 15 * SIZE(AO3)
1721 FMADD y09, alpha3, a1, y09
1722 LFD a1, 0 * SIZE(AO4)
1723 FMADD y10, alpha3, a2, y10
1724 LFD a2, 1 * SIZE(AO4)
1725 FMADD y11, alpha3, a3, y11
1726 LFD a3, 2 * SIZE(AO4)
1727 FMADD y12, alpha3, a4, y12
1728 LFD a4, 3 * SIZE(AO4)
1730 FMADD y13, alpha3, a5, y13
1731 LFD a5, 4 * SIZE(AO4)
1732 FMADD y14, alpha3, a6, y14
1733 LFD a6, 5 * SIZE(AO4)
1734 FMADD y15, alpha3, a7, y15
1735 LFD a7, 6 * SIZE(AO4)
1736 FMADD y16, alpha3, a8, y16
1737 LFD a8, 7 * SIZE(AO4)
1739 FMADD y01, alpha4, a1, y01
1740 LFD a1, 8 * SIZE(AO4)
1741 FMADD y02, alpha4, a2, y02
1742 LFD a2, 9 * SIZE(AO4)
1743 FMADD y03, alpha4, a3, y03
1744 LFD a3, 10 * SIZE(AO4)
1745 FMADD y04, alpha4, a4, y04
1746 LFD a4, 11 * SIZE(AO4)
1748 STFD y01, 0 * SIZE(Y1)
1749 STFD y02, 1 * SIZE(Y1)
1750 STFD y03, 2 * SIZE(Y1)
1751 STFD y04, 3 * SIZE(Y1)
1753 LFD y01, 16 * SIZE(Y1)
1754 LFD y02, 17 * SIZE(Y1)
1755 LFD y03, 18 * SIZE(Y1)
1756 LFD y04, 19 * SIZE(Y1)
1758 FMADD y05, alpha4, a5, y05
1759 LFD a5, 12 * SIZE(AO4)
1760 FMADD y06, alpha4, a6, y06
1761 LFD a6, 13 * SIZE(AO4)
1762 FMADD y07, alpha4, a7, y07
1763 LFD a7, 14 * SIZE(AO4)
1764 FMADD y08, alpha4, a8, y08
1765 LFD a8, 15 * SIZE(AO4)
1767 STFD y05, 4 * SIZE(Y1)
1768 STFD y06, 5 * SIZE(Y1)
1769 STFD y07, 6 * SIZE(Y1)
1770 STFD y08, 7 * SIZE(Y1)
1772 LFD y05, 20 * SIZE(Y1)
1773 LFD y06, 21 * SIZE(Y1)
1774 LFD y07, 22 * SIZE(Y1)
1775 LFD y08, 23 * SIZE(Y1)
1777 addi AO3, AO3, 16 * SIZE
1778 addi AO4, AO4, 16 * SIZE
1782 FMADD y09, alpha4, a1, y09
1783 LFD a1, 0 * SIZE(AO1)
1784 FMADD y10, alpha4, a2, y10
1785 LFD a2, 1 * SIZE(AO1)
1786 FMADD y11, alpha4, a3, y11
1787 LFD a3, 2 * SIZE(AO1)
1788 FMADD y12, alpha4, a4, y12
1789 LFD a4, 3 * SIZE(AO1)
1791 STFD y09, 8 * SIZE(Y1)
1792 STFD y10, 9 * SIZE(Y1)
1793 STFD y11, 10 * SIZE(Y1)
1794 STFD y12, 11 * SIZE(Y1)
1796 LFD y09, 24 * SIZE(Y1)
1797 LFD y10, 25 * SIZE(Y1)
1798 LFD y11, 26 * SIZE(Y1)
1799 LFD y12, 27 * SIZE(Y1)
1801 FMADD y13, alpha4, a5, y13
1802 LFD a5, 4 * SIZE(AO1)
1803 FMADD y14, alpha4, a6, y14
1804 LFD a6, 5 * SIZE(AO1)
1805 FMADD y15, alpha4, a7, y15
1806 LFD a7, 6 * SIZE(AO1)
1807 FMADD y16, alpha4, a8, y16
1808 LFD a8, 7 * SIZE(AO1)
1810 STFD y13, 12 * SIZE(Y1)
1811 STFD y14, 13 * SIZE(Y1)
1812 STFD y15, 14 * SIZE(Y1)
1813 STFD y16, 15 * SIZE(Y1)
1815 LFD y13, 28 * SIZE(Y1)
1816 LFD y14, 29 * SIZE(Y1)
1817 LFD y15, 30 * SIZE(Y1)
1818 LFD y16, 31 * SIZE(Y1)
1820 addi Y1, Y1, 16 * SIZE
1826 FMADD y01, alpha1, a1, y01
1827 LFD a1, 8 * SIZE(AO1)
1828 FMADD y02, alpha1, a2, y02
1829 LFD a2, 9 * SIZE(AO1)
1830 FMADD y03, alpha1, a3, y03
1831 LFD a3, 10 * SIZE(AO1)
1832 FMADD y04, alpha1, a4, y04
1833 LFD a4, 11 * SIZE(AO1)
1835 FMADD y05, alpha1, a5, y05
1836 LFD a5, 12 * SIZE(AO1)
1837 FMADD y06, alpha1, a6, y06
1838 LFD a6, 13 * SIZE(AO1)
1839 FMADD y07, alpha1, a7, y07
1840 LFD a7, 14 * SIZE(AO1)
1841 FMADD y08, alpha1, a8, y08
1842 LFD a8, 15 * SIZE(AO1)
1844 FMADD y09, alpha1, a1, y09
1845 LFD a1, 0 * SIZE(AO2)
1846 FMADD y10, alpha1, a2, y10
1847 LFD a2, 1 * SIZE(AO2)
1848 FMADD y11, alpha1, a3, y11
1849 LFD a3, 2 * SIZE(AO2)
1850 FMADD y12, alpha1, a4, y12
1851 LFD a4, 3 * SIZE(AO2)
1853 FMADD y13, alpha1, a5, y13
1854 LFD a5, 4 * SIZE(AO2)
1855 FMADD y14, alpha1, a6, y14
1856 LFD a6, 5 * SIZE(AO2)
1857 FMADD y15, alpha1, a7, y15
1858 LFD a7, 6 * SIZE(AO2)
1859 FMADD y16, alpha1, a8, y16
1860 LFD a8, 7 * SIZE(AO2)
1862 FMADD y01, alpha2, a1, y01
1863 LFD a1, 8 * SIZE(AO2)
1864 FMADD y02, alpha2, a2, y02
1865 LFD a2, 9 * SIZE(AO2)
1866 FMADD y03, alpha2, a3, y03
1867 LFD a3, 10 * SIZE(AO2)
1868 FMADD y04, alpha2, a4, y04
1869 LFD a4, 11 * SIZE(AO2)
1871 FMADD y05, alpha2, a5, y05
1872 LFD a5, 12 * SIZE(AO2)
1873 FMADD y06, alpha2, a6, y06
1874 LFD a6, 13 * SIZE(AO2)
1875 FMADD y07, alpha2, a7, y07
1876 LFD a7, 14 * SIZE(AO2)
1877 FMADD y08, alpha2, a8, y08
1878 LFD a8, 15 * SIZE(AO2)
1880 FMADD y09, alpha2, a1, y09
1881 LFD a1, 0 * SIZE(AO3)
1882 FMADD y10, alpha2, a2, y10
1883 LFD a2, 1 * SIZE(AO3)
1884 FMADD y11, alpha2, a3, y11
1885 LFD a3, 2 * SIZE(AO3)
1886 FMADD y12, alpha2, a4, y12
1887 LFD a4, 3 * SIZE(AO3)
1889 FMADD y13, alpha2, a5, y13
1890 LFD a5, 4 * SIZE(AO3)
1891 FMADD y14, alpha2, a6, y14
1892 LFD a6, 5 * SIZE(AO3)
1893 FMADD y15, alpha2, a7, y15
1894 LFD a7, 6 * SIZE(AO3)
1895 FMADD y16, alpha2, a8, y16
1896 LFD a8, 7 * SIZE(AO3)
1898 FMADD y01, alpha3, a1, y01
1899 LFD a1, 8 * SIZE(AO3)
1900 FMADD y02, alpha3, a2, y02
1901 LFD a2, 9 * SIZE(AO3)
1902 FMADD y03, alpha3, a3, y03
1903 LFD a3, 10 * SIZE(AO3)
1904 FMADD y04, alpha3, a4, y04
1905 LFD a4, 11 * SIZE(AO3)
1907 FMADD y05, alpha3, a5, y05
1908 LFD a5, 12 * SIZE(AO3)
1909 FMADD y06, alpha3, a6, y06
1910 LFD a6, 13 * SIZE(AO3)
1911 FMADD y07, alpha3, a7, y07
1912 LFD a7, 14 * SIZE(AO3)
1913 FMADD y08, alpha3, a8, y08
1914 LFD a8, 15 * SIZE(AO3)
1916 FMADD y09, alpha3, a1, y09
1917 LFD a1, 0 * SIZE(AO4)
1918 FMADD y10, alpha3, a2, y10
1919 LFD a2, 1 * SIZE(AO4)
1920 FMADD y11, alpha3, a3, y11
1921 LFD a3, 2 * SIZE(AO4)
1922 FMADD y12, alpha3, a4, y12
1923 LFD a4, 3 * SIZE(AO4)
1925 FMADD y13, alpha3, a5, y13
1926 LFD a5, 4 * SIZE(AO4)
1927 FMADD y14, alpha3, a6, y14
1928 LFD a6, 5 * SIZE(AO4)
1929 FMADD y15, alpha3, a7, y15
1930 LFD a7, 6 * SIZE(AO4)
1931 FMADD y16, alpha3, a8, y16
1932 LFD a8, 7 * SIZE(AO4)
1934 FMADD y01, alpha4, a1, y01
1935 LFD a1, 8 * SIZE(AO4)
1936 FMADD y02, alpha4, a2, y02
1937 LFD a2, 9 * SIZE(AO4)
1938 FMADD y03, alpha4, a3, y03
1939 LFD a3, 10 * SIZE(AO4)
1940 FMADD y04, alpha4, a4, y04
1941 LFD a4, 11 * SIZE(AO4)
1943 FMADD y05, alpha4, a5, y05
1944 LFD a5, 12 * SIZE(AO4)
1945 FMADD y06, alpha4, a6, y06
1946 LFD a6, 13 * SIZE(AO4)
1947 FMADD y07, alpha4, a7, y07
1948 LFD a7, 14 * SIZE(AO4)
1949 FMADD y08, alpha4, a8, y08
1950 LFD a8, 15 * SIZE(AO4)
1952 FMADD y09, alpha4, a1, y09
1953 addi AO1, AO1, 16 * SIZE
1954 FMADD y10, alpha4, a2, y10
1955 addi AO2, AO2, 16 * SIZE
1956 FMADD y11, alpha4, a3, y11
1957 addi AO3, AO3, 16 * SIZE
1958 FMADD y12, alpha4, a4, y12
1959 addi AO4, AO4, 16 * SIZE
1961 FMADD y13, alpha4, a5, y13
1962 FMADD y14, alpha4, a6, y14
1963 FMADD y15, alpha4, a7, y15
1964 FMADD y16, alpha4, a8, y16
1966 STFD y01, 0 * SIZE(Y1)
1967 STFD y02, 1 * SIZE(Y1)
1968 STFD y03, 2 * SIZE(Y1)
1969 STFD y04, 3 * SIZE(Y1)
1970 STFD y05, 4 * SIZE(Y1)
1971 STFD y06, 5 * SIZE(Y1)
1972 STFD y07, 6 * SIZE(Y1)
1973 STFD y08, 7 * SIZE(Y1)
1974 STFD y09, 8 * SIZE(Y1)
1975 STFD y10, 9 * SIZE(Y1)
1976 STFD y11, 10 * SIZE(Y1)
1977 STFD y12, 11 * SIZE(Y1)
1978 STFD y13, 12 * SIZE(Y1)
1979 STFD y14, 13 * SIZE(Y1)
1980 STFD y15, 14 * SIZE(Y1)
1981 STFD y16, 15 * SIZE(Y1)
1982 addi Y1, Y1, 16 * SIZE
1992 LFD y01, 0 * SIZE(Y1)
1993 LFD y02, 1 * SIZE(Y1)
1994 LFD y03, 2 * SIZE(Y1)
1995 LFD y04, 3 * SIZE(Y1)
1996 LFD y05, 4 * SIZE(Y1)
1997 LFD y06, 5 * SIZE(Y1)
1998 LFD y07, 6 * SIZE(Y1)
1999 LFD y08, 7 * SIZE(Y1)
2001 LFD a1, 0 * SIZE(AO1)
2002 LFD a2, 1 * SIZE(AO1)
2003 LFD a3, 2 * SIZE(AO1)
2004 LFD a4, 3 * SIZE(AO1)
2005 LFD a5, 4 * SIZE(AO1)
2006 LFD a6, 5 * SIZE(AO1)
2007 LFD a7, 6 * SIZE(AO1)
2008 LFD a8, 7 * SIZE(AO1)
2010 FMADD y01, alpha1, a1, y01
2011 LFD a1, 0 * SIZE(AO2)
2012 FMADD y02, alpha1, a2, y02
2013 LFD a2, 1 * SIZE(AO2)
2014 FMADD y03, alpha1, a3, y03
2015 LFD a3, 2 * SIZE(AO2)
2016 FMADD y04, alpha1, a4, y04
2017 LFD a4, 3 * SIZE(AO2)
2019 FMADD y05, alpha1, a5, y05
2020 LFD a5, 4 * SIZE(AO2)
2021 FMADD y06, alpha1, a6, y06
2022 LFD a6, 5 * SIZE(AO2)
2023 FMADD y07, alpha1, a7, y07
2024 LFD a7, 6 * SIZE(AO2)
2025 FMADD y08, alpha1, a8, y08
2026 LFD a8, 7 * SIZE(AO2)
2028 FMADD y01, alpha2, a1, y01
2029 LFD a1, 0 * SIZE(AO3)
2030 FMADD y02, alpha2, a2, y02
2031 LFD a2, 1 * SIZE(AO3)
2032 FMADD y03, alpha2, a3, y03
2033 LFD a3, 2 * SIZE(AO3)
2034 FMADD y04, alpha2, a4, y04
2035 LFD a4, 3 * SIZE(AO3)
2036 FMADD y05, alpha2, a5, y05
2037 LFD a5, 4 * SIZE(AO3)
2038 FMADD y06, alpha2, a6, y06
2039 LFD a6, 5 * SIZE(AO3)
2040 FMADD y07, alpha2, a7, y07
2041 LFD a7, 6 * SIZE(AO3)
2042 FMADD y08, alpha2, a8, y08
2043 LFD a8, 7 * SIZE(AO3)
2045 FMADD y01, alpha3, a1, y01
2046 LFD a1, 0 * SIZE(AO4)
2047 FMADD y02, alpha3, a2, y02
2048 LFD a2, 1 * SIZE(AO4)
2049 FMADD y03, alpha3, a3, y03
2050 LFD a3, 2 * SIZE(AO4)
2051 FMADD y04, alpha3, a4, y04
2052 LFD a4, 3 * SIZE(AO4)
2054 FMADD y05, alpha3, a5, y05
2055 LFD a5, 4 * SIZE(AO4)
2056 FMADD y06, alpha3, a6, y06
2057 LFD a6, 5 * SIZE(AO4)
2058 FMADD y07, alpha3, a7, y07
2059 LFD a7, 6 * SIZE(AO4)
2060 FMADD y08, alpha3, a8, y08
2061 LFD a8, 7 * SIZE(AO4)
2063 FMADD y01, alpha4, a1, y01
2064 addi AO1, AO1, 8 * SIZE
2065 FMADD y02, alpha4, a2, y02
2066 addi AO2, AO2, 8 * SIZE
2067 FMADD y03, alpha4, a3, y03
2068 addi AO3, AO3, 8 * SIZE
2069 FMADD y04, alpha4, a4, y04
2070 addi AO4, AO4, 8 * SIZE
2072 STFD y01, 0 * SIZE(Y1)
2073 STFD y02, 1 * SIZE(Y1)
2074 STFD y03, 2 * SIZE(Y1)
2075 STFD y04, 3 * SIZE(Y1)
2077 FMADD y05, alpha4, a5, y05
2078 FMADD y06, alpha4, a6, y06
2079 FMADD y07, alpha4, a7, y07
2080 FMADD y08, alpha4, a8, y08
2082 STFD y05, 4 * SIZE(Y1)
2083 STFD y06, 5 * SIZE(Y1)
2084 STFD y07, 6 * SIZE(Y1)
2085 STFD y08, 7 * SIZE(Y1)
2086 addi Y1, Y1, 8 * SIZE
2093 LFD y01, 0 * SIZE(Y1)
2094 LFD y02, 1 * SIZE(Y1)
2095 LFD y03, 2 * SIZE(Y1)
2096 LFD y04, 3 * SIZE(Y1)
2098 LFD a1, 0 * SIZE(AO1)
2099 LFD a2, 1 * SIZE(AO1)
2100 LFD a3, 2 * SIZE(AO1)
2101 LFD a4, 3 * SIZE(AO1)
2103 LFD a5, 0 * SIZE(AO2)
2104 LFD a6, 1 * SIZE(AO2)
2105 LFD a7, 2 * SIZE(AO2)
2106 LFD a8, 3 * SIZE(AO2)
2108 FMADD y01, alpha1, a1, y01
2109 LFD a1, 0 * SIZE(AO3)
2110 FMADD y02, alpha1, a2, y02
2111 LFD a2, 1 * SIZE(AO3)
2112 FMADD y03, alpha1, a3, y03
2113 LFD a3, 2 * SIZE(AO3)
2114 FMADD y04, alpha1, a4, y04
2115 LFD a4, 3 * SIZE(AO3)
2117 FMADD y01, alpha2, a5, y01
2118 LFD a5, 0 * SIZE(AO4)
2119 FMADD y02, alpha2, a6, y02
2120 LFD a6, 1 * SIZE(AO4)
2121 FMADD y03, alpha2, a7, y03
2122 LFD a7, 2 * SIZE(AO4)
2123 FMADD y04, alpha2, a8, y04
2124 LFD a8, 3 * SIZE(AO4)
2126 FMADD y01, alpha3, a1, y01
2127 addi AO1, AO1, 4 * SIZE
2128 FMADD y02, alpha3, a2, y02
2129 addi AO2, AO2, 4 * SIZE
2130 FMADD y03, alpha3, a3, y03
2131 addi AO3, AO3, 4 * SIZE
2132 FMADD y04, alpha3, a4, y04
2133 addi AO4, AO4, 4 * SIZE
2135 FMADD y01, alpha4, a5, y01
2136 FMADD y02, alpha4, a6, y02
2137 FMADD y03, alpha4, a7, y03
2138 FMADD y04, alpha4, a8, y04
2140 STFD y01, 0 * SIZE(Y1)
2141 STFD y02, 1 * SIZE(Y1)
2142 STFD y03, 2 * SIZE(Y1)
2143 STFD y04, 3 * SIZE(Y1)
2144 addi Y1, Y1, 4 * SIZE
2151 LFD y01, 0 * SIZE(Y1)
2152 LFD y02, 1 * SIZE(Y1)
2154 LFD a1, 0 * SIZE(AO1)
2155 LFD a2, 1 * SIZE(AO1)
2156 LFD a3, 0 * SIZE(AO2)
2157 LFD a4, 1 * SIZE(AO2)
2159 LFD a5, 0 * SIZE(AO3)
2160 LFD a6, 1 * SIZE(AO3)
2161 LFD a7, 0 * SIZE(AO4)
2162 LFD a8, 1 * SIZE(AO4)
2164 FMADD y01, alpha1, a1, y01
2165 addi AO1, AO1, 2 * SIZE
2166 FMADD y02, alpha1, a2, y02
2167 addi AO2, AO2, 2 * SIZE
2168 FMADD y01, alpha2, a3, y01
2169 addi AO3, AO3, 2 * SIZE
2170 FMADD y02, alpha2, a4, y02
2171 addi AO4, AO4, 2 * SIZE
2173 FMADD y01, alpha3, a5, y01
2174 FMADD y02, alpha3, a6, y02
2175 FMADD y01, alpha4, a7, y01
2176 FMADD y02, alpha4, a8, y02
2178 STFD y01, 0 * SIZE(Y1)
2179 STFD y02, 1 * SIZE(Y1)
2180 addi Y1, Y1, 2 * SIZE
2187 LFD y01, 0 * SIZE(Y1)
2189 LFD a1, 0 * SIZE(AO1)
2190 LFD a2, 0 * SIZE(AO2)
2191 LFD a3, 0 * SIZE(AO3)
2192 LFD a4, 0 * SIZE(AO4)
2194 FMADD y01, alpha1, a1, y01
2195 FMADD y01, alpha2, a2, y01
2196 FMADD y01, alpha3, a3, y01
2197 FMADD y01, alpha4, a4, y01
2199 STFD y01, 0 * SIZE(Y1)
2208 LFD alpha1, 0 * SIZE(X)
2210 LFD alpha2, 0 * SIZE(X)
2213 FMUL alpha1, alpha, alpha1
2214 FMUL alpha2, alpha, alpha2
2227 LFD y01, 0 * SIZE(Y1)
2228 LFD y02, 1 * SIZE(Y1)
2229 LFD y03, 2 * SIZE(Y1)
2230 LFD y04, 3 * SIZE(Y1)
2231 LFD y05, 4 * SIZE(Y1)
2232 LFD y06, 5 * SIZE(Y1)
2233 LFD y07, 6 * SIZE(Y1)
2234 LFD y08, 7 * SIZE(Y1)
2235 LFD y09, 8 * SIZE(Y1)
2236 LFD y10, 9 * SIZE(Y1)
2237 LFD y11, 10 * SIZE(Y1)
2238 LFD y12, 11 * SIZE(Y1)
2239 LFD y13, 12 * SIZE(Y1)
2240 LFD y14, 13 * SIZE(Y1)
2241 LFD y15, 14 * SIZE(Y1)
2242 LFD y16, 15 * SIZE(Y1)
2244 LFD a1, 0 * SIZE(AO1)
2245 LFD a2, 1 * SIZE(AO1)
2246 LFD a3, 2 * SIZE(AO1)
2247 LFD a4, 3 * SIZE(AO1)
2248 LFD a5, 4 * SIZE(AO1)
2249 LFD a6, 5 * SIZE(AO1)
2250 LFD a7, 6 * SIZE(AO1)
2251 LFD a8, 7 * SIZE(AO1)
2256 FMADD y01, alpha1, a1, y01
2257 LFD a1, 8 * SIZE(AO1)
2258 FMADD y02, alpha1, a2, y02
2259 LFD a2, 9 * SIZE(AO1)
2260 FMADD y03, alpha1, a3, y03
2261 LFD a3, 10 * SIZE(AO1)
2262 FMADD y04, alpha1, a4, y04
2263 LFD a4, 11 * SIZE(AO1)
2265 FMADD y05, alpha1, a5, y05
2266 LFD a5, 12 * SIZE(AO1)
2267 FMADD y06, alpha1, a6, y06
2268 LFD a6, 13 * SIZE(AO1)
2269 FMADD y07, alpha1, a7, y07
2270 LFD a7, 14 * SIZE(AO1)
2271 FMADD y08, alpha1, a8, y08
2272 LFD a8, 15 * SIZE(AO1)
2274 FMADD y09, alpha1, a1, y09
2275 LFD a1, 0 * SIZE(AO2)
2276 FMADD y10, alpha1, a2, y10
2277 LFD a2, 1 * SIZE(AO2)
2278 FMADD y11, alpha1, a3, y11
2279 LFD a3, 2 * SIZE(AO2)
2280 FMADD y12, alpha1, a4, y12
2281 LFD a4, 3 * SIZE(AO2)
2283 FMADD y13, alpha1, a5, y13
2284 LFD a5, 4 * SIZE(AO2)
2285 FMADD y14, alpha1, a6, y14
2286 LFD a6, 5 * SIZE(AO2)
2287 FMADD y15, alpha1, a7, y15
2288 LFD a7, 6 * SIZE(AO2)
2289 FMADD y16, alpha1, a8, y16
2290 LFD a8, 7 * SIZE(AO2)
2292 FMADD y01, alpha2, a1, y01
2293 LFD a1, 8 * SIZE(AO2)
2294 FMADD y02, alpha2, a2, y02
2295 LFD a2, 9 * SIZE(AO2)
2296 FMADD y03, alpha2, a3, y03
2297 LFD a3, 10 * SIZE(AO2)
2298 FMADD y04, alpha2, a4, y04
2299 LFD a4, 11 * SIZE(AO2)
2301 FMADD y05, alpha2, a5, y05
2302 LFD a5, 12 * SIZE(AO2)
2303 FMADD y06, alpha2, a6, y06
2304 LFD a6, 13 * SIZE(AO2)
2305 FMADD y07, alpha2, a7, y07
2306 LFD a7, 14 * SIZE(AO2)
2307 FMADD y08, alpha2, a8, y08
2308 LFD a8, 15 * SIZE(AO2)
2310 FMADD y09, alpha2, a1, y09
2311 LFD a1, 16 * SIZE(AO1)
2312 FMADD y10, alpha2, a2, y10
2313 LFD a2, 17 * SIZE(AO1)
2314 FMADD y11, alpha2, a3, y11
2315 LFD a3, 18 * SIZE(AO1)
2316 FMADD y12, alpha2, a4, y12
2317 LFD a4, 19 * SIZE(AO1)
2319 FMADD y13, alpha2, a5, y13
2320 LFD a5, 20 * SIZE(AO1)
2321 FMADD y14, alpha2, a6, y14
2322 LFD a6, 21 * SIZE(AO1)
2323 FMADD y15, alpha2, a7, y15
2324 LFD a7, 22 * SIZE(AO1)
2325 FMADD y16, alpha2, a8, y16
2326 LFD a8, 23 * SIZE(AO1)
2328 STFD y01, 0 * SIZE(Y1)
2329 STFD y02, 1 * SIZE(Y1)
2330 STFD y03, 2 * SIZE(Y1)
2331 STFD y04, 3 * SIZE(Y1)
2333 LFD y01, 16 * SIZE(Y1)
2334 LFD y02, 17 * SIZE(Y1)
2335 LFD y03, 18 * SIZE(Y1)
2336 LFD y04, 19 * SIZE(Y1)
2338 STFD y05, 4 * SIZE(Y1)
2339 STFD y06, 5 * SIZE(Y1)
2340 STFD y07, 6 * SIZE(Y1)
2341 STFD y08, 7 * SIZE(Y1)
2343 LFD y05, 20 * SIZE(Y1)
2344 LFD y06, 21 * SIZE(Y1)
2345 LFD y07, 22 * SIZE(Y1)
2346 LFD y08, 23 * SIZE(Y1)
2348 STFD y09, 8 * SIZE(Y1)
2349 STFD y10, 9 * SIZE(Y1)
2350 STFD y11, 10 * SIZE(Y1)
2351 STFD y12, 11 * SIZE(Y1)
2353 LFD y09, 24 * SIZE(Y1)
2354 LFD y10, 25 * SIZE(Y1)
2355 LFD y11, 26 * SIZE(Y1)
2356 LFD y12, 27 * SIZE(Y1)
2358 STFD y13, 12 * SIZE(Y1)
2359 STFD y14, 13 * SIZE(Y1)
2360 STFD y15, 14 * SIZE(Y1)
2361 STFD y16, 15 * SIZE(Y1)
2363 LFD y13, 28 * SIZE(Y1)
2364 LFD y14, 29 * SIZE(Y1)
2365 LFD y15, 30 * SIZE(Y1)
2366 LFD y16, 31 * SIZE(Y1)
2368 addi AO1, AO1, 16 * SIZE
2369 addi AO2, AO2, 16 * SIZE
2370 addi Y1, Y1, 16 * SIZE
2380 FMADD y01, alpha1, a1, y01
2381 LFD a1, 8 * SIZE(AO1)
2382 FMADD y02, alpha1, a2, y02
2383 LFD a2, 9 * SIZE(AO1)
2384 FMADD y03, alpha1, a3, y03
2385 LFD a3, 10 * SIZE(AO1)
2386 FMADD y04, alpha1, a4, y04
2387 LFD a4, 11 * SIZE(AO1)
2389 FMADD y05, alpha1, a5, y05
2390 LFD a5, 12 * SIZE(AO1)
2391 FMADD y06, alpha1, a6, y06
2392 LFD a6, 13 * SIZE(AO1)
2393 FMADD y07, alpha1, a7, y07
2394 LFD a7, 14 * SIZE(AO1)
2395 FMADD y08, alpha1, a8, y08
2396 LFD a8, 15 * SIZE(AO1)
2398 FMADD y09, alpha1, a1, y09
2399 LFD a1, 0 * SIZE(AO2)
2400 FMADD y10, alpha1, a2, y10
2401 LFD a2, 1 * SIZE(AO2)
2402 FMADD y11, alpha1, a3, y11
2403 LFD a3, 2 * SIZE(AO2)
2404 FMADD y12, alpha1, a4, y12
2405 LFD a4, 3 * SIZE(AO2)
2407 FMADD y13, alpha1, a5, y13
2408 LFD a5, 4 * SIZE(AO2)
2409 FMADD y14, alpha1, a6, y14
2410 LFD a6, 5 * SIZE(AO2)
2411 FMADD y15, alpha1, a7, y15
2412 LFD a7, 6 * SIZE(AO2)
2413 FMADD y16, alpha1, a8, y16
2414 LFD a8, 7 * SIZE(AO2)
2416 FMADD y01, alpha2, a1, y01
2417 LFD a1, 8 * SIZE(AO2)
2418 FMADD y02, alpha2, a2, y02
2419 LFD a2, 9 * SIZE(AO2)
2420 FMADD y03, alpha2, a3, y03
2421 LFD a3, 10 * SIZE(AO2)
2422 FMADD y04, alpha2, a4, y04
2423 LFD a4, 11 * SIZE(AO2)
2425 FMADD y05, alpha2, a5, y05
2426 LFD a5, 12 * SIZE(AO2)
2427 FMADD y06, alpha2, a6, y06
2428 LFD a6, 13 * SIZE(AO2)
2429 FMADD y07, alpha2, a7, y07
2430 LFD a7, 14 * SIZE(AO2)
2431 FMADD y08, alpha2, a8, y08
2432 LFD a8, 15 * SIZE(AO2)
2434 FMADD y09, alpha2, a1, y09
2435 FMADD y10, alpha2, a2, y10
2436 FMADD y11, alpha2, a3, y11
2437 FMADD y12, alpha2, a4, y12
2438 FMADD y13, alpha2, a5, y13
2439 FMADD y14, alpha2, a6, y14
2440 FMADD y15, alpha2, a7, y15
2441 FMADD y16, alpha2, a8, y16
2443 STFD y01, 0 * SIZE(Y1)
2444 STFD y02, 1 * SIZE(Y1)
2445 STFD y03, 2 * SIZE(Y1)
2446 STFD y04, 3 * SIZE(Y1)
2447 STFD y05, 4 * SIZE(Y1)
2448 STFD y06, 5 * SIZE(Y1)
2449 STFD y07, 6 * SIZE(Y1)
2450 STFD y08, 7 * SIZE(Y1)
2451 STFD y09, 8 * SIZE(Y1)
2452 STFD y10, 9 * SIZE(Y1)
2453 STFD y11, 10 * SIZE(Y1)
2454 STFD y12, 11 * SIZE(Y1)
2455 STFD y13, 12 * SIZE(Y1)
2456 STFD y14, 13 * SIZE(Y1)
2457 STFD y15, 14 * SIZE(Y1)
2458 STFD y16, 15 * SIZE(Y1)
2460 addi AO1, AO1, 16 * SIZE
2461 addi AO2, AO2, 16 * SIZE
2462 addi Y1, Y1, 16 * SIZE
2472 LFD y01, 0 * SIZE(Y1)
2473 LFD y02, 1 * SIZE(Y1)
2474 LFD y03, 2 * SIZE(Y1)
2475 LFD y04, 3 * SIZE(Y1)
2476 LFD y05, 4 * SIZE(Y1)
2477 LFD y06, 5 * SIZE(Y1)
2478 LFD y07, 6 * SIZE(Y1)
2479 LFD y08, 7 * SIZE(Y1)
2481 LFD a1, 0 * SIZE(AO1)
2482 LFD a2, 1 * SIZE(AO1)
2483 LFD a3, 2 * SIZE(AO1)
2484 LFD a4, 3 * SIZE(AO1)
2485 LFD a5, 4 * SIZE(AO1)
2486 LFD a6, 5 * SIZE(AO1)
2487 LFD a7, 6 * SIZE(AO1)
2488 LFD a8, 7 * SIZE(AO1)
2490 FMADD y01, alpha1, a1, y01
2491 LFD a1, 0 * SIZE(AO2)
2492 FMADD y02, alpha1, a2, y02
2493 LFD a2, 1 * SIZE(AO2)
2494 FMADD y03, alpha1, a3, y03
2495 LFD a3, 2 * SIZE(AO2)
2496 FMADD y04, alpha1, a4, y04
2497 LFD a4, 3 * SIZE(AO2)
2498 FMADD y05, alpha1, a5, y05
2499 LFD a5, 4 * SIZE(AO2)
2500 FMADD y06, alpha1, a6, y06
2501 LFD a6, 5 * SIZE(AO2)
2502 FMADD y07, alpha1, a7, y07
2503 LFD a7, 6 * SIZE(AO2)
2504 FMADD y08, alpha1, a8, y08
2505 LFD a8, 7 * SIZE(AO2)
2507 FMADD y01, alpha2, a1, y01
2508 FMADD y02, alpha2, a2, y02
2509 FMADD y03, alpha2, a3, y03
2510 FMADD y04, alpha2, a4, y04
2511 FMADD y05, alpha2, a5, y05
2512 FMADD y06, alpha2, a6, y06
2513 FMADD y07, alpha2, a7, y07
2514 FMADD y08, alpha2, a8, y08
2516 STFD y01, 0 * SIZE(Y1)
2517 STFD y02, 1 * SIZE(Y1)
2518 STFD y03, 2 * SIZE(Y1)
2519 STFD y04, 3 * SIZE(Y1)
2520 STFD y05, 4 * SIZE(Y1)
2521 STFD y06, 5 * SIZE(Y1)
2522 STFD y07, 6 * SIZE(Y1)
2523 STFD y08, 7 * SIZE(Y1)
2525 addi AO1, AO1, 8 * SIZE
2526 addi AO2, AO2, 8 * SIZE
2527 addi Y1, Y1, 8 * SIZE
2534 LFD y01, 0 * SIZE(Y1)
2535 LFD y02, 1 * SIZE(Y1)
2536 LFD y03, 2 * SIZE(Y1)
2537 LFD y04, 3 * SIZE(Y1)
2539 LFD a1, 0 * SIZE(AO1)
2540 LFD a2, 1 * SIZE(AO1)
2541 LFD a3, 2 * SIZE(AO1)
2542 LFD a4, 3 * SIZE(AO1)
2544 LFD a5, 0 * SIZE(AO2)
2545 LFD a6, 1 * SIZE(AO2)
2546 LFD a7, 2 * SIZE(AO2)
2547 LFD a8, 3 * SIZE(AO2)
2549 FMADD y01, alpha1, a1, y01
2550 FMADD y02, alpha1, a2, y02
2551 FMADD y03, alpha1, a3, y03
2552 FMADD y04, alpha1, a4, y04
2554 FMADD y01, alpha2, a5, y01
2555 FMADD y02, alpha2, a6, y02
2556 FMADD y03, alpha2, a7, y03
2557 FMADD y04, alpha2, a8, y04
2559 STFD y01, 0 * SIZE(Y1)
2560 STFD y02, 1 * SIZE(Y1)
2561 STFD y03, 2 * SIZE(Y1)
2562 STFD y04, 3 * SIZE(Y1)
2564 addi AO1, AO1, 4 * SIZE
2565 addi AO2, AO2, 4 * SIZE
2566 addi Y1, Y1, 4 * SIZE
2573 LFD y01, 0 * SIZE(Y1)
2574 LFD y02, 1 * SIZE(Y1)
2576 LFD a1, 0 * SIZE(AO1)
2577 LFD a2, 1 * SIZE(AO1)
2578 LFD a3, 0 * SIZE(AO2)
2579 LFD a4, 1 * SIZE(AO2)
2581 FMADD y01, alpha1, a1, y01
2582 FMADD y02, alpha1, a2, y02
2583 FMADD y01, alpha2, a3, y01
2584 FMADD y02, alpha2, a4, y02
2586 STFD y01, 0 * SIZE(Y1)
2587 STFD y02, 1 * SIZE(Y1)
2589 addi AO1, AO1, 2 * SIZE
2590 addi AO2, AO2, 2 * SIZE
2591 addi Y1, Y1, 2 * SIZE
2598 LFD y01, 0 * SIZE(Y1)
2600 LFD a1, 0 * SIZE(AO1)
2601 LFD a2, 0 * SIZE(AO2)
2603 FMADD y01, alpha1, a1, y01
2604 FMADD y01, alpha2, a2, y01
2606 STFD y01, 0 * SIZE(Y1)
2615 LFD alpha1, 0 * SIZE(X)
2616 FMUL alpha1, alpha, alpha1
2625 LFD y01, 0 * SIZE(Y1)
2626 LFD y02, 1 * SIZE(Y1)
2627 LFD y03, 2 * SIZE(Y1)
2628 LFD y04, 3 * SIZE(Y1)
2629 LFD y05, 4 * SIZE(Y1)
2630 LFD y06, 5 * SIZE(Y1)
2631 LFD y07, 6 * SIZE(Y1)
2632 LFD y08, 7 * SIZE(Y1)
2634 LFD a1, 0 * SIZE(AO1)
2635 LFD a2, 1 * SIZE(AO1)
2636 LFD a3, 2 * SIZE(AO1)
2637 LFD a4, 3 * SIZE(AO1)
2638 LFD a5, 4 * SIZE(AO1)
2639 LFD a6, 5 * SIZE(AO1)
2640 LFD a7, 6 * SIZE(AO1)
2641 LFD a8, 7 * SIZE(AO1)
2643 LFD y09, 8 * SIZE(Y1)
2644 LFD y10, 9 * SIZE(Y1)
2645 LFD y11, 10 * SIZE(Y1)
2646 LFD y12, 11 * SIZE(Y1)
2647 LFD y13, 12 * SIZE(Y1)
2648 LFD y14, 13 * SIZE(Y1)
2649 LFD y15, 14 * SIZE(Y1)
2650 LFD y16, 15 * SIZE(Y1)
2656 FMADD y01, alpha1, a1, y01
2657 LFD a1, 8 * SIZE(AO1)
2658 FMADD y02, alpha1, a2, y02
2659 LFD a2, 9 * SIZE(AO1)
2660 FMADD y03, alpha1, a3, y03
2661 LFD a3, 10 * SIZE(AO1)
2662 FMADD y04, alpha1, a4, y04
2663 LFD a4, 11 * SIZE(AO1)
2665 FMADD y05, alpha1, a5, y05
2666 LFD a5, 12 * SIZE(AO1)
2667 FMADD y06, alpha1, a6, y06
2668 LFD a6, 13 * SIZE(AO1)
2669 FMADD y07, alpha1, a7, y07
2670 LFD a7, 14 * SIZE(AO1)
2671 FMADD y08, alpha1, a8, y08
2672 LFD a8, 15 * SIZE(AO1)
2674 FMADD y09, alpha1, a1, y09
2675 LFD a1, 16 * SIZE(AO1)
2676 FMADD y10, alpha1, a2, y10
2677 LFD a2, 17 * SIZE(AO1)
2678 FMADD y11, alpha1, a3, y11
2679 LFD a3, 18 * SIZE(AO1)
2680 FMADD y12, alpha1, a4, y12
2681 LFD a4, 19 * SIZE(AO1)
2683 FMADD y13, alpha1, a5, y13
2684 LFD a5, 20 * SIZE(AO1)
2685 FMADD y14, alpha1, a6, y14
2686 LFD a6, 21 * SIZE(AO1)
2687 FMADD y15, alpha1, a7, y15
2688 LFD a7, 22 * SIZE(AO1)
2689 FMADD y16, alpha1, a8, y16
2690 LFD a8, 23 * SIZE(AO1)
2692 STFD y01, 0 * SIZE(Y1)
2693 LFD y01, 16 * SIZE(Y1)
2694 STFD y02, 1 * SIZE(Y1)
2695 LFD y02, 17 * SIZE(Y1)
2697 STFD y03, 2 * SIZE(Y1)
2698 LFD y03, 18 * SIZE(Y1)
2699 STFD y04, 3 * SIZE(Y1)
2700 LFD y04, 19 * SIZE(Y1)
2702 STFD y05, 4 * SIZE(Y1)
2703 LFD y05, 20 * SIZE(Y1)
2704 STFD y06, 5 * SIZE(Y1)
2705 LFD y06, 21 * SIZE(Y1)
2707 STFD y07, 6 * SIZE(Y1)
2708 LFD y07, 22 * SIZE(Y1)
2709 STFD y08, 7 * SIZE(Y1)
2710 LFD y08, 23 * SIZE(Y1)
2712 STFD y09, 8 * SIZE(Y1)
2713 LFD y09, 24 * SIZE(Y1)
2714 STFD y10, 9 * SIZE(Y1)
2715 LFD y10, 25 * SIZE(Y1)
2717 STFD y11, 10 * SIZE(Y1)
2718 LFD y11, 26 * SIZE(Y1)
2719 STFD y12, 11 * SIZE(Y1)
2720 LFD y12, 27 * SIZE(Y1)
2722 STFD y13, 12 * SIZE(Y1)
2723 LFD y13, 28 * SIZE(Y1)
2724 STFD y14, 13 * SIZE(Y1)
2725 LFD y14, 29 * SIZE(Y1)
2727 STFD y15, 14 * SIZE(Y1)
2728 LFD y15, 30 * SIZE(Y1)
2729 STFD y16, 15 * SIZE(Y1)
2730 LFD y16, 31 * SIZE(Y1)
2732 addi AO1, AO1, 16 * SIZE
2733 addi Y1, Y1, 16 * SIZE
2742 FMADD y01, alpha1, a1, y01
2743 LFD a1, 8 * SIZE(AO1)
2744 FMADD y02, alpha1, a2, y02
2745 LFD a2, 9 * SIZE(AO1)
2746 FMADD y03, alpha1, a3, y03
2747 LFD a3, 10 * SIZE(AO1)
2748 FMADD y04, alpha1, a4, y04
2749 LFD a4, 11 * SIZE(AO1)
2751 FMADD y05, alpha1, a5, y05
2752 LFD a5, 12 * SIZE(AO1)
2753 FMADD y06, alpha1, a6, y06
2754 LFD a6, 13 * SIZE(AO1)
2755 FMADD y07, alpha1, a7, y07
2756 LFD a7, 14 * SIZE(AO1)
2757 FMADD y08, alpha1, a8, y08
2758 LFD a8, 15 * SIZE(AO1)
2760 FMADD y09, alpha1, a1, y09
2761 FMADD y10, alpha1, a2, y10
2762 FMADD y11, alpha1, a3, y11
2763 FMADD y12, alpha1, a4, y12
2764 FMADD y13, alpha1, a5, y13
2765 FMADD y14, alpha1, a6, y14
2766 FMADD y15, alpha1, a7, y15
2767 FMADD y16, alpha1, a8, y16
2769 STFD y01, 0 * SIZE(Y1)
2770 STFD y02, 1 * SIZE(Y1)
2771 STFD y03, 2 * SIZE(Y1)
2772 STFD y04, 3 * SIZE(Y1)
2773 STFD y05, 4 * SIZE(Y1)
2774 STFD y06, 5 * SIZE(Y1)
2775 STFD y07, 6 * SIZE(Y1)
2776 STFD y08, 7 * SIZE(Y1)
2778 STFD y09, 8 * SIZE(Y1)
2779 STFD y10, 9 * SIZE(Y1)
2780 STFD y11, 10 * SIZE(Y1)
2781 STFD y12, 11 * SIZE(Y1)
2782 STFD y13, 12 * SIZE(Y1)
2783 STFD y14, 13 * SIZE(Y1)
2784 STFD y15, 14 * SIZE(Y1)
2785 STFD y16, 15 * SIZE(Y1)
2787 addi AO1, AO1, 16 * SIZE
2788 addi Y1, Y1, 16 * SIZE
2798 LFD y01, 0 * SIZE(Y1)
2799 LFD y02, 1 * SIZE(Y1)
2800 LFD y03, 2 * SIZE(Y1)
2801 LFD y04, 3 * SIZE(Y1)
2803 LFD a1, 0 * SIZE(AO1)
2804 LFD a2, 1 * SIZE(AO1)
2805 LFD a3, 2 * SIZE(AO1)
2806 LFD a4, 3 * SIZE(AO1)
2808 LFD y05, 4 * SIZE(Y1)
2809 LFD y06, 5 * SIZE(Y1)
2810 LFD y07, 6 * SIZE(Y1)
2811 LFD y08, 7 * SIZE(Y1)
2813 LFD a5, 4 * SIZE(AO1)
2814 LFD a6, 5 * SIZE(AO1)
2815 LFD a7, 6 * SIZE(AO1)
2816 LFD a8, 7 * SIZE(AO1)
2818 FMADD y01, alpha1, a1, y01
2819 FMADD y02, alpha1, a2, y02
2820 FMADD y03, alpha1, a3, y03
2821 FMADD y04, alpha1, a4, y04
2823 FMADD y05, alpha1, a5, y05
2824 FMADD y06, alpha1, a6, y06
2825 FMADD y07, alpha1, a7, y07
2826 FMADD y08, alpha1, a8, y08
2828 STFD y01, 0 * SIZE(Y1)
2829 STFD y02, 1 * SIZE(Y1)
2830 STFD y03, 2 * SIZE(Y1)
2831 STFD y04, 3 * SIZE(Y1)
2833 STFD y05, 4 * SIZE(Y1)
2834 STFD y06, 5 * SIZE(Y1)
2835 STFD y07, 6 * SIZE(Y1)
2836 STFD y08, 7 * SIZE(Y1)
2838 addi AO1, AO1, 8 * SIZE
2839 addi Y1, Y1, 8 * SIZE
2846 LFD y01, 0 * SIZE(Y1)
2847 LFD y02, 1 * SIZE(Y1)
2848 LFD y03, 2 * SIZE(Y1)
2849 LFD y04, 3 * SIZE(Y1)
2851 LFD a1, 0 * SIZE(AO1)
2852 LFD a2, 1 * SIZE(AO1)
2853 LFD a3, 2 * SIZE(AO1)
2854 LFD a4, 3 * SIZE(AO1)
2856 FMADD y01, alpha1, a1, y01
2857 FMADD y02, alpha1, a2, y02
2858 FMADD y03, alpha1, a3, y03
2859 FMADD y04, alpha1, a4, y04
2861 STFD y01, 0 * SIZE(Y1)
2862 STFD y02, 1 * SIZE(Y1)
2863 STFD y03, 2 * SIZE(Y1)
2864 STFD y04, 3 * SIZE(Y1)
2866 addi AO1, AO1, 4 * SIZE
2867 addi Y1, Y1, 4 * SIZE
2874 LFD y01, 0 * SIZE(Y1)
2875 LFD y02, 1 * SIZE(Y1)
2877 LFD a1, 0 * SIZE(AO1)
2878 LFD a2, 1 * SIZE(AO1)
2880 FMADD y01, alpha1, a1, y01
2881 FMADD y02, alpha1, a2, y02
2883 STFD y01, 0 * SIZE(Y1)
2884 STFD y02, 1 * SIZE(Y1)
2886 addi AO1, AO1, 2 * SIZE
2887 addi Y1, Y1, 2 * SIZE
2894 LFD y01, 0 * SIZE(Y1)
2895 LFD a1, 0 * SIZE(AO1)
2897 FMADD y01, alpha1, a1, y01
2898 STFD y01, 0 * SIZE(Y1)
2902 cmpi cr0, 0, INCY, SIZE
2931 LFD f8, 0 * SIZE(YY)
2932 LFD f9, 1 * SIZE(YY)
2933 LFD f10, 2 * SIZE(YY)
2934 LFD f11, 3 * SIZE(YY)
2935 LFD f12, 4 * SIZE(YY)
2936 LFD f13, 5 * SIZE(YY)
2937 LFD f14, 6 * SIZE(YY)
2938 LFD f15, 7 * SIZE(YY)
2939 addi YY, YY, 8 * SIZE
2950 STFD f8, 0 * SIZE(Y1)
2952 STFD f9, 0 * SIZE(Y1)
2954 STFD f10, 0 * SIZE(Y1)
2956 STFD f11, 0 * SIZE(Y1)
2958 STFD f12, 0 * SIZE(Y1)
2960 STFD f13, 0 * SIZE(Y1)
2962 STFD f14, 0 * SIZE(Y1)
2964 STFD f15, 0 * SIZE(Y1)
2982 LFD f8, 0 * SIZE(YY)
2983 LFD f9, 1 * SIZE(YY)
2984 LFD f10, 2 * SIZE(YY)
2985 LFD f11, 3 * SIZE(YY)
2986 addi YY, YY, 4 * SIZE
2993 STFD f8, 0 * SIZE(Y1)
2995 STFD f9, 0 * SIZE(Y1)
2997 STFD f10, 0 * SIZE(Y1)
2999 STFD f11, 0 * SIZE(Y1)
3012 LFD f8, 0 * SIZE(YY)
3013 LFD f9, 1 * SIZE(YY)
3014 addi YY, YY, 2 * SIZE
3019 STFD f8, 0 * SIZE(Y1)
3021 STFD f9, 0 * SIZE(Y1)
3030 LFD f8, 0 * SIZE(YY)
3034 STFD f8, 0 * SIZE(Y1)
3091 addi SP, SP, STACKSIZE