1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
64 #if defined(_AIX) || defined(__APPLE__)
65 #if !defined(__64BIT__) && defined(DOUBLE)
108 #define PREFETCHSIZE_A 42
109 #define PREFETCHSIZE_C 16
112 #if defined(PPC440) || defined(PPC440FP2)
113 #define PREFETCHSIZE_A 42
114 #define PREFETCHSIZE_C 16
118 #define PREFETCHSIZE_A 42
119 #define PREFETCHSIZE_C 16
123 #define PREFETCHSIZE_A 42
124 #define PREFETCHSIZE_C 16
128 #define PREFETCHSIZE_A 48
129 #define PREFETCHSIZE_C 16
133 #define PREFETCHSIZE_A 40
134 #define PREFETCHSIZE_C 8
138 #define PREFETCHSIZE_A 96
139 #define PREFETCHSIZE_C 8
143 #define PREFETCHSIZE_A 96
144 #define PREFETCHSIZE_C 8
189 #define STACKSIZE 224
191 #define STACKSIZE 288
194 #define FZERO 144(SP)
195 #define ALPHA 152(SP)
200 addi SP, SP, -STACKSIZE
265 lwz INCY, FRAMESLOT(0) + STACKSIZE(SP)
266 lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP)
268 ld Y, FRAMESLOT(0) + STACKSIZE(SP)
269 ld INCY, FRAMESLOT(1) + STACKSIZE(SP)
270 ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
274 #if defined(_AIX) || defined(__APPLE__)
277 lwz INCX, FRAMESLOT(0) + STACKSIZE(SP)
278 lwz Y, FRAMESLOT(1) + STACKSIZE(SP)
279 lwz INCY, FRAMESLOT(2) + STACKSIZE(SP)
280 lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP)
282 lwz Y, FRAMESLOT(0) + STACKSIZE(SP)
283 lwz INCY, FRAMESLOT(1) + STACKSIZE(SP)
284 lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
287 ld Y, FRAMESLOT(0) + STACKSIZE(SP)
288 ld INCY, FRAMESLOT(1) + STACKSIZE(SP)
289 ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
295 subf PLDA_M, XP, PLDA_M
296 slwi PLDA_M, PLDA_M, BASE_SHIFT
298 slwi LDA, LDA, BASE_SHIFT
299 slwi INCX, INCX, BASE_SHIFT
300 slwi INCY, INCY, BASE_SHIFT
308 li PREA, PREFETCHSIZE_A * SIZE
309 li PREC, PREFETCHSIZE_C * SIZE
320 slwi r0, IS, BASE_SHIFT
321 cmpi cr0, 0, MIN_N, P
326 cmpi cr0, 0, INCX, SIZE
330 addi CO, BUFFER, -SIZE
355 STFD f0, 1 * SIZE(CO)
356 STFD f1, 2 * SIZE(CO)
357 STFD f2, 3 * SIZE(CO)
358 STFD f3, 4 * SIZE(CO)
359 STFD f4, 5 * SIZE(CO)
360 STFD f5, 6 * SIZE(CO)
361 STFD f6, 7 * SIZE(CO)
362 STFDU f7, 8 * SIZE(CO)
375 STFDU f0, 1 * SIZE(CO)
422 LFD a1, 1 * SIZE(AO1)
423 LFD a2, 1 * SIZE(AO2)
424 LFD a3, 1 * SIZE(AO3)
425 LFD a4, 1 * SIZE(AO4)
426 LFD a5, 1 * SIZE(AO5)
427 LFD a6, 1 * SIZE(AO6)
428 LFD a7, 1 * SIZE(AO7)
429 LFD a8, 1 * SIZE(AO8)
443 FMADD y01, a1, b1, y01
444 LFD a1, 2 * SIZE(AO1)
445 FMADD y02, a2, b1, y02
446 LFD a2, 2 * SIZE(AO2)
448 FMADD y03, a3, b1, y03
449 LFD a3, 2 * SIZE(AO3)
450 FMADD y04, a4, b1, y04
451 LFD a4, 2 * SIZE(AO4)
453 FMADD y05, a5, b1, y05
454 LFD a5, 2 * SIZE(AO5)
455 FMADD y06, a6, b1, y06
456 LFD a6, 2 * SIZE(AO6)
458 FMADD y07, a7, b1, y07
459 LFD a7, 2 * SIZE(AO7)
460 FMADD y08, a8, b1, y08
461 LFD a8, 2 * SIZE(AO8)
463 FMADD y09, a1, b2, y09
464 LFD a1, 3 * SIZE(AO1)
465 FMADD y10, a2, b2, y10
466 LFD a2, 3 * SIZE(AO2)
468 FMADD y11, a3, b2, y11
469 LFD a3, 3 * SIZE(AO3)
470 FMADD y12, a4, b2, y12
471 LFD a4, 3 * SIZE(AO4)
473 FMADD y13, a5, b2, y13
474 LFD a5, 3 * SIZE(AO5)
475 FMADD y14, a6, b2, y14
476 LFD a6, 3 * SIZE(AO6)
478 FMADD y15, a7, b2, y15
479 LFD a7, 3 * SIZE(AO7)
480 FMADD y16, a8, b2, y16
481 LFD a8, 3 * SIZE(AO8)
483 FMADD y01, a1, b3, y01
484 LFD a1, 4 * SIZE(AO1)
485 FMADD y02, a2, b3, y02
486 LFD a2, 4 * SIZE(AO2)
488 FMADD y03, a3, b3, y03
489 LFD a3, 4 * SIZE(AO3)
490 FMADD y04, a4, b3, y04
491 LFD a4, 4 * SIZE(AO4)
493 FMADD y05, a5, b3, y05
494 LFD a5, 4 * SIZE(AO5)
495 FMADD y06, a6, b3, y06
496 LFD a6, 4 * SIZE(AO6)
498 FMADD y07, a7, b3, y07
499 LFD a7, 4 * SIZE(AO7)
500 FMADD y08, a8, b3, y08
501 LFD a8, 4 * SIZE(AO8)
503 FMADD y09, a1, b4, y09
504 LFD a1, 5 * SIZE(AO1)
505 FMADD y10, a2, b4, y10
506 LFD a2, 5 * SIZE(AO2)
508 FMADD y11, a3, b4, y11
509 LFD a3, 5 * SIZE(AO3)
510 FMADD y12, a4, b4, y12
511 LFD a4, 5 * SIZE(AO4)
513 FMADD y13, a5, b4, y13
514 LFD a5, 5 * SIZE(AO5)
515 FMADD y14, a6, b4, y14
516 LFD a6, 5 * SIZE(AO6)
518 FMADD y15, a7, b4, y15
519 LFD a7, 5 * SIZE(AO7)
520 FMADD y16, a8, b4, y16
521 LFD a8, 5 * SIZE(AO8)
524 LFD b2, 10 * SIZE(BO)
525 LFD b3, 11 * SIZE(BO)
526 LFD b4, 12 * SIZE(BO)
528 FMADD y01, a1, b5, y01
529 LFD a1, 6 * SIZE(AO1)
530 FMADD y02, a2, b5, y02
531 LFD a2, 6 * SIZE(AO2)
533 FMADD y03, a3, b5, y03
534 LFD a3, 6 * SIZE(AO3)
535 FMADD y04, a4, b5, y04
536 LFD a4, 6 * SIZE(AO4)
538 FMADD y05, a5, b5, y05
539 LFD a5, 6 * SIZE(AO5)
540 FMADD y06, a6, b5, y06
541 LFD a6, 6 * SIZE(AO6)
543 FMADD y07, a7, b5, y07
544 LFD a7, 6 * SIZE(AO7)
545 FMADD y08, a8, b5, y08
546 LFD a8, 6 * SIZE(AO8)
548 FMADD y09, a1, b6, y09
549 LFD a1, 7 * SIZE(AO1)
550 FMADD y10, a2, b6, y10
551 LFD a2, 7 * SIZE(AO2)
553 FMADD y11, a3, b6, y11
554 LFD a3, 7 * SIZE(AO3)
555 FMADD y12, a4, b6, y12
556 LFD a4, 7 * SIZE(AO4)
558 FMADD y13, a5, b6, y13
559 LFD a5, 7 * SIZE(AO5)
560 FMADD y14, a6, b6, y14
561 LFD a6, 7 * SIZE(AO6)
563 FMADD y15, a7, b6, y15
564 LFD a7, 7 * SIZE(AO7)
565 FMADD y16, a8, b6, y16
566 LFD a8, 7 * SIZE(AO8)
568 FMADD y01, a1, b7, y01
569 LFD a1, 8 * SIZE(AO1)
570 FMADD y02, a2, b7, y02
571 LFD a2, 8 * SIZE(AO2)
573 FMADD y03, a3, b7, y03
574 LFD a3, 8 * SIZE(AO3)
575 FMADD y04, a4, b7, y04
576 LFD a4, 8 * SIZE(AO4)
578 FMADD y05, a5, b7, y05
579 LFD a5, 8 * SIZE(AO5)
580 FMADD y06, a6, b7, y06
581 LFD a6, 8 * SIZE(AO6)
583 FMADD y07, a7, b7, y07
584 LFD a7, 8 * SIZE(AO7)
585 FMADD y08, a8, b7, y08
586 LFD a8, 8 * SIZE(AO8)
588 FMADD y09, a1, b8, y09
589 LFD a1, 9 * SIZE(AO1)
590 FMADD y10, a2, b8, y10
591 LFD a2, 9 * SIZE(AO2)
593 FMADD y11, a3, b8, y11
594 LFD a3, 9 * SIZE(AO3)
595 FMADD y12, a4, b8, y12
596 LFD a4, 9 * SIZE(AO4)
598 FMADD y13, a5, b8, y13
599 LFD a5, 9 * SIZE(AO5)
600 FMADD y14, a6, b8, y14
601 LFD a6, 9 * SIZE(AO6)
603 FMADD y15, a7, b8, y15
604 LFD a7, 9 * SIZE(AO7)
605 FMADD y16, a8, b8, y16
606 LFD a8, 9 * SIZE(AO8)
608 LFD b5, 13 * SIZE(BO)
609 LFD b6, 14 * SIZE(BO)
610 LFD b7, 15 * SIZE(BO)
611 LFD b8, 16 * SIZE(BO)
618 FMADD y01, a1, b1, y01
619 LFD a1, 10 * SIZE(AO1)
620 FMADD y02, a2, b1, y02
621 LFD a2, 10 * SIZE(AO2)
623 FMADD y03, a3, b1, y03
624 LFD a3, 10 * SIZE(AO3)
625 FMADD y04, a4, b1, y04
626 LFD a4, 10 * SIZE(AO4)
628 FMADD y05, a5, b1, y05
629 LFD a5, 10 * SIZE(AO5)
630 FMADD y06, a6, b1, y06
631 LFD a6, 10 * SIZE(AO6)
633 FMADD y07, a7, b1, y07
634 LFD a7, 10 * SIZE(AO7)
635 FMADD y08, a8, b1, y08
636 LFD a8, 10 * SIZE(AO8)
638 FMADD y09, a1, b2, y09
639 LFD a1, 11 * SIZE(AO1)
640 FMADD y10, a2, b2, y10
641 LFD a2, 11 * SIZE(AO2)
643 FMADD y11, a3, b2, y11
644 LFD a3, 11 * SIZE(AO3)
645 FMADD y12, a4, b2, y12
646 LFD a4, 11 * SIZE(AO4)
648 FMADD y13, a5, b2, y13
649 LFD a5, 11 * SIZE(AO5)
650 FMADD y14, a6, b2, y14
651 LFD a6, 11 * SIZE(AO6)
653 FMADD y15, a7, b2, y15
654 LFD a7, 11 * SIZE(AO7)
655 FMADD y16, a8, b2, y16
656 LFD a8, 11 * SIZE(AO8)
658 FMADD y01, a1, b3, y01
659 LFD a1, 12 * SIZE(AO1)
660 FMADD y02, a2, b3, y02
661 LFD a2, 12 * SIZE(AO2)
663 FMADD y03, a3, b3, y03
664 LFD a3, 12 * SIZE(AO3)
665 FMADD y04, a4, b3, y04
666 LFD a4, 12 * SIZE(AO4)
668 FMADD y05, a5, b3, y05
669 LFD a5, 12 * SIZE(AO5)
670 FMADD y06, a6, b3, y06
671 LFD a6, 12 * SIZE(AO6)
673 FMADD y07, a7, b3, y07
674 LFD a7, 12 * SIZE(AO7)
675 FMADD y08, a8, b3, y08
676 LFD a8, 12 * SIZE(AO8)
678 FMADD y09, a1, b4, y09
679 LFD a1, 13 * SIZE(AO1)
680 FMADD y10, a2, b4, y10
681 LFD a2, 13 * SIZE(AO2)
683 FMADD y11, a3, b4, y11
684 LFD a3, 13 * SIZE(AO3)
685 FMADD y12, a4, b4, y12
686 LFD a4, 13 * SIZE(AO4)
688 FMADD y13, a5, b4, y13
689 LFD a5, 13 * SIZE(AO5)
690 FMADD y14, a6, b4, y14
691 LFD a6, 13 * SIZE(AO6)
693 FMADD y15, a7, b4, y15
694 LFD a7, 13 * SIZE(AO7)
695 FMADD y16, a8, b4, y16
696 LFD a8, 13 * SIZE(AO8)
698 LFD b1, 17 * SIZE(BO)
699 LFD b2, 18 * SIZE(BO)
700 LFD b3, 19 * SIZE(BO)
701 LFD b4, 20 * SIZE(BO)
703 FMADD y01, a1, b5, y01
704 LFD a1, 14 * SIZE(AO1)
705 FMADD y02, a2, b5, y02
706 LFD a2, 14 * SIZE(AO2)
708 FMADD y03, a3, b5, y03
709 LFD a3, 14 * SIZE(AO3)
710 FMADD y04, a4, b5, y04
711 LFD a4, 14 * SIZE(AO4)
713 FMADD y05, a5, b5, y05
714 LFD a5, 14 * SIZE(AO5)
715 FMADD y06, a6, b5, y06
716 LFD a6, 14 * SIZE(AO6)
718 FMADD y07, a7, b5, y07
719 LFD a7, 14 * SIZE(AO7)
720 FMADD y08, a8, b5, y08
721 LFD a8, 14 * SIZE(AO8)
723 FMADD y09, a1, b6, y09
724 LFD a1, 15 * SIZE(AO1)
725 FMADD y10, a2, b6, y10
726 LFD a2, 15 * SIZE(AO2)
728 FMADD y11, a3, b6, y11
729 LFD a3, 15 * SIZE(AO3)
730 FMADD y12, a4, b6, y12
731 LFD a4, 15 * SIZE(AO4)
733 FMADD y13, a5, b6, y13
734 LFD a5, 15 * SIZE(AO5)
735 FMADD y14, a6, b6, y14
736 LFD a6, 15 * SIZE(AO6)
738 FMADD y15, a7, b6, y15
739 LFD a7, 15 * SIZE(AO7)
740 FMADD y16, a8, b6, y16
741 LFD a8, 15 * SIZE(AO8)
743 FMADD y01, a1, b7, y01
744 LFD a1, 16 * SIZE(AO1)
745 FMADD y02, a2, b7, y02
746 LFD a2, 16 * SIZE(AO2)
748 FMADD y03, a3, b7, y03
749 LFD a3, 16 * SIZE(AO3)
750 FMADD y04, a4, b7, y04
751 LFD a4, 16 * SIZE(AO4)
753 FMADD y05, a5, b7, y05
754 LFD a5, 16 * SIZE(AO5)
755 FMADD y06, a6, b7, y06
756 LFD a6, 16 * SIZE(AO6)
758 FMADD y07, a7, b7, y07
759 LFD a7, 16 * SIZE(AO7)
760 FMADD y08, a8, b7, y08
761 LFD a8, 16 * SIZE(AO8)
763 FMADD y09, a1, b8, y09
764 LFD a1, 17 * SIZE(AO1)
765 FMADD y10, a2, b8, y10
766 LFD a2, 17 * SIZE(AO2)
768 FMADD y11, a3, b8, y11
769 LFD a3, 17 * SIZE(AO3)
770 FMADD y12, a4, b8, y12
771 LFD a4, 17 * SIZE(AO4)
773 addi AO1, AO1, 16 * SIZE
774 addi AO2, AO2, 16 * SIZE
775 addi AO3, AO3, 16 * SIZE
776 addi AO4, AO4, 16 * SIZE
778 FMADD y13, a5, b8, y13
779 LFD a5, 17 * SIZE(AO5)
780 FMADD y14, a6, b8, y14
781 LFD a6, 17 * SIZE(AO6)
783 FMADD y15, a7, b8, y15
784 LFD a7, 17 * SIZE(AO7)
785 FMADD y16, a8, b8, y16
786 LFD a8, 17 * SIZE(AO8)
788 LFD b5, 21 * SIZE(BO)
789 LFD b6, 22 * SIZE(BO)
790 LFD b7, 23 * SIZE(BO)
791 LFD b8, 24 * SIZE(BO)
793 addi AO5, AO5, 16 * SIZE
794 addi AO6, AO6, 16 * SIZE
798 addi AO7, AO7, 16 * SIZE
799 addi AO8, AO8, 16 * SIZE
803 addi BO, BO, 16 * SIZE
808 FMADD y01, a1, b1, y01
809 LFD a1, 2 * SIZE(AO1)
810 FMADD y02, a2, b1, y02
811 LFD a2, 2 * SIZE(AO2)
813 FMADD y03, a3, b1, y03
814 LFD a3, 2 * SIZE(AO3)
815 FMADD y04, a4, b1, y04
816 LFD a4, 2 * SIZE(AO4)
818 FMADD y05, a5, b1, y05
819 LFD a5, 2 * SIZE(AO5)
820 FMADD y06, a6, b1, y06
821 LFD a6, 2 * SIZE(AO6)
823 FMADD y07, a7, b1, y07
824 LFD a7, 2 * SIZE(AO7)
825 FMADD y08, a8, b1, y08
826 LFD a8, 2 * SIZE(AO8)
828 FMADD y09, a1, b2, y09
829 LFD a1, 3 * SIZE(AO1)
830 FMADD y10, a2, b2, y10
831 LFD a2, 3 * SIZE(AO2)
833 FMADD y11, a3, b2, y11
834 LFD a3, 3 * SIZE(AO3)
835 FMADD y12, a4, b2, y12
836 LFD a4, 3 * SIZE(AO4)
838 FMADD y13, a5, b2, y13
839 LFD a5, 3 * SIZE(AO5)
840 FMADD y14, a6, b2, y14
841 LFD a6, 3 * SIZE(AO6)
843 FMADD y15, a7, b2, y15
844 LFD a7, 3 * SIZE(AO7)
845 FMADD y16, a8, b2, y16
846 LFD a8, 3 * SIZE(AO8)
848 FMADD y01, a1, b3, y01
849 LFD a1, 4 * SIZE(AO1)
850 FMADD y02, a2, b3, y02
851 LFD a2, 4 * SIZE(AO2)
853 FMADD y03, a3, b3, y03
854 LFD a3, 4 * SIZE(AO3)
855 FMADD y04, a4, b3, y04
856 LFD a4, 4 * SIZE(AO4)
858 FMADD y05, a5, b3, y05
859 LFD a5, 4 * SIZE(AO5)
860 FMADD y06, a6, b3, y06
861 LFD a6, 4 * SIZE(AO6)
863 FMADD y07, a7, b3, y07
864 LFD a7, 4 * SIZE(AO7)
865 FMADD y08, a8, b3, y08
866 LFD a8, 4 * SIZE(AO8)
868 FMADD y09, a1, b4, y09
869 LFD a1, 5 * SIZE(AO1)
870 FMADD y10, a2, b4, y10
871 LFD a2, 5 * SIZE(AO2)
873 FMADD y11, a3, b4, y11
874 LFD a3, 5 * SIZE(AO3)
875 FMADD y12, a4, b4, y12
876 LFD a4, 5 * SIZE(AO4)
878 FMADD y13, a5, b4, y13
879 LFD a5, 5 * SIZE(AO5)
880 FMADD y14, a6, b4, y14
881 LFD a6, 5 * SIZE(AO6)
883 FMADD y15, a7, b4, y15
884 LFD a7, 5 * SIZE(AO7)
885 FMADD y16, a8, b4, y16
886 LFD a8, 5 * SIZE(AO8)
889 LFD b2, 10 * SIZE(BO)
890 LFD b3, 11 * SIZE(BO)
891 LFD b4, 12 * SIZE(BO)
893 FMADD y01, a1, b5, y01
894 LFD a1, 6 * SIZE(AO1)
895 FMADD y02, a2, b5, y02
896 LFD a2, 6 * SIZE(AO2)
898 FMADD y03, a3, b5, y03
899 LFD a3, 6 * SIZE(AO3)
900 FMADD y04, a4, b5, y04
901 LFD a4, 6 * SIZE(AO4)
903 FMADD y05, a5, b5, y05
904 LFD a5, 6 * SIZE(AO5)
905 FMADD y06, a6, b5, y06
906 LFD a6, 6 * SIZE(AO6)
908 FMADD y07, a7, b5, y07
909 LFD a7, 6 * SIZE(AO7)
910 FMADD y08, a8, b5, y08
911 LFD a8, 6 * SIZE(AO8)
913 FMADD y09, a1, b6, y09
914 LFD a1, 7 * SIZE(AO1)
915 FMADD y10, a2, b6, y10
916 LFD a2, 7 * SIZE(AO2)
918 FMADD y11, a3, b6, y11
919 LFD a3, 7 * SIZE(AO3)
920 FMADD y12, a4, b6, y12
921 LFD a4, 7 * SIZE(AO4)
923 FMADD y13, a5, b6, y13
924 LFD a5, 7 * SIZE(AO5)
925 FMADD y14, a6, b6, y14
926 LFD a6, 7 * SIZE(AO6)
928 FMADD y15, a7, b6, y15
929 LFD a7, 7 * SIZE(AO7)
930 FMADD y16, a8, b6, y16
931 LFD a8, 7 * SIZE(AO8)
933 FMADD y01, a1, b7, y01
934 LFD a1, 8 * SIZE(AO1)
935 FMADD y02, a2, b7, y02
936 LFD a2, 8 * SIZE(AO2)
938 FMADD y03, a3, b7, y03
939 LFD a3, 8 * SIZE(AO3)
940 FMADD y04, a4, b7, y04
941 LFD a4, 8 * SIZE(AO4)
943 FMADD y05, a5, b7, y05
944 LFD a5, 8 * SIZE(AO5)
945 FMADD y06, a6, b7, y06
946 LFD a6, 8 * SIZE(AO6)
948 FMADD y07, a7, b7, y07
949 LFD a7, 8 * SIZE(AO7)
950 FMADD y08, a8, b7, y08
951 LFD a8, 8 * SIZE(AO8)
953 FMADD y09, a1, b8, y09
954 LFD a1, 9 * SIZE(AO1)
955 FMADD y10, a2, b8, y10
956 LFD a2, 9 * SIZE(AO2)
958 FMADD y11, a3, b8, y11
959 LFD a3, 9 * SIZE(AO3)
960 FMADD y12, a4, b8, y12
961 LFD a4, 9 * SIZE(AO4)
963 FMADD y13, a5, b8, y13
964 LFD a5, 9 * SIZE(AO5)
965 FMADD y14, a6, b8, y14
966 LFD a6, 9 * SIZE(AO6)
968 FMADD y15, a7, b8, y15
969 LFD a7, 9 * SIZE(AO7)
970 FMADD y16, a8, b8, y16
971 LFD a8, 9 * SIZE(AO8)
973 LFD b5, 13 * SIZE(BO)
974 LFD b6, 14 * SIZE(BO)
975 LFD b7, 15 * SIZE(BO)
976 LFD b8, 16 * SIZE(BO)
978 FMADD y01, a1, b1, y01
979 LFD a1, 10 * SIZE(AO1)
980 FMADD y02, a2, b1, y02
981 LFD a2, 10 * SIZE(AO2)
983 FMADD y03, a3, b1, y03
984 LFD a3, 10 * SIZE(AO3)
985 FMADD y04, a4, b1, y04
986 LFD a4, 10 * SIZE(AO4)
988 FMADD y05, a5, b1, y05
989 LFD a5, 10 * SIZE(AO5)
990 FMADD y06, a6, b1, y06
991 LFD a6, 10 * SIZE(AO6)
993 FMADD y07, a7, b1, y07
994 LFD a7, 10 * SIZE(AO7)
995 FMADD y08, a8, b1, y08
996 LFD a8, 10 * SIZE(AO8)
998 FMADD y09, a1, b2, y09
999 LFD a1, 11 * SIZE(AO1)
1000 FMADD y10, a2, b2, y10
1001 LFD a2, 11 * SIZE(AO2)
1003 FMADD y11, a3, b2, y11
1004 LFD a3, 11 * SIZE(AO3)
1005 FMADD y12, a4, b2, y12
1006 LFD a4, 11 * SIZE(AO4)
1008 FMADD y13, a5, b2, y13
1009 LFD a5, 11 * SIZE(AO5)
1010 FMADD y14, a6, b2, y14
1011 LFD a6, 11 * SIZE(AO6)
1013 FMADD y15, a7, b2, y15
1014 LFD a7, 11 * SIZE(AO7)
1015 FMADD y16, a8, b2, y16
1016 LFD a8, 11 * SIZE(AO8)
1018 FMADD y01, a1, b3, y01
1019 LFD a1, 12 * SIZE(AO1)
1020 FMADD y02, a2, b3, y02
1021 LFD a2, 12 * SIZE(AO2)
1023 FMADD y03, a3, b3, y03
1024 LFD a3, 12 * SIZE(AO3)
1025 FMADD y04, a4, b3, y04
1026 LFD a4, 12 * SIZE(AO4)
1028 FMADD y05, a5, b3, y05
1029 LFD a5, 12 * SIZE(AO5)
1030 FMADD y06, a6, b3, y06
1031 LFD a6, 12 * SIZE(AO6)
1033 FMADD y07, a7, b3, y07
1034 LFD a7, 12 * SIZE(AO7)
1035 FMADD y08, a8, b3, y08
1036 LFD a8, 12 * SIZE(AO8)
1038 FMADD y09, a1, b4, y09
1039 LFD a1, 13 * SIZE(AO1)
1040 FMADD y10, a2, b4, y10
1041 LFD a2, 13 * SIZE(AO2)
1043 FMADD y11, a3, b4, y11
1044 LFD a3, 13 * SIZE(AO3)
1045 FMADD y12, a4, b4, y12
1046 LFD a4, 13 * SIZE(AO4)
1048 FMADD y13, a5, b4, y13
1049 LFD a5, 13 * SIZE(AO5)
1050 FMADD y14, a6, b4, y14
1051 LFD a6, 13 * SIZE(AO6)
1053 FMADD y15, a7, b4, y15
1054 LFD a7, 13 * SIZE(AO7)
1055 FMADD y16, a8, b4, y16
1056 LFD a8, 13 * SIZE(AO8)
1058 FMADD y01, a1, b5, y01
1059 LFD a1, 14 * SIZE(AO1)
1060 FMADD y02, a2, b5, y02
1061 LFD a2, 14 * SIZE(AO2)
1063 FMADD y03, a3, b5, y03
1064 LFD a3, 14 * SIZE(AO3)
1065 FMADD y04, a4, b5, y04
1066 LFD a4, 14 * SIZE(AO4)
1068 FMADD y05, a5, b5, y05
1069 LFD a5, 14 * SIZE(AO5)
1070 FMADD y06, a6, b5, y06
1071 LFD a6, 14 * SIZE(AO6)
1073 FMADD y07, a7, b5, y07
1074 LFD a7, 14 * SIZE(AO7)
1075 FMADD y08, a8, b5, y08
1076 LFD a8, 14 * SIZE(AO8)
1078 FMADD y09, a1, b6, y09
1079 LFD a1, 15 * SIZE(AO1)
1080 FMADD y10, a2, b6, y10
1081 LFD a2, 15 * SIZE(AO2)
1083 FMADD y11, a3, b6, y11
1084 LFD a3, 15 * SIZE(AO3)
1085 FMADD y12, a4, b6, y12
1086 LFD a4, 15 * SIZE(AO4)
1088 FMADD y13, a5, b6, y13
1089 LFD a5, 15 * SIZE(AO5)
1090 FMADD y14, a6, b6, y14
1091 LFD a6, 15 * SIZE(AO6)
1093 FMADD y15, a7, b6, y15
1094 LFD a7, 15 * SIZE(AO7)
1095 FMADD y16, a8, b6, y16
1096 LFD a8, 15 * SIZE(AO8)
1098 FMADD y01, a1, b7, y01
1099 LFD a1, 16 * SIZE(AO1)
1100 FMADD y02, a2, b7, y02
1101 LFD a2, 16 * SIZE(AO2)
1103 FMADD y03, a3, b7, y03
1104 LFD a3, 16 * SIZE(AO3)
1105 FMADD y04, a4, b7, y04
1106 LFD a4, 16 * SIZE(AO4)
1108 FMADD y05, a5, b7, y05
1109 LFD a5, 16 * SIZE(AO5)
1110 FMADD y06, a6, b7, y06
1111 LFD a6, 16 * SIZE(AO6)
1113 FMADD y07, a7, b7, y07
1114 LFD a7, 16 * SIZE(AO7)
1115 FMADD y08, a8, b7, y08
1116 LFD a8, 16 * SIZE(AO8)
1118 FMADD y09, a1, b8, y09
1119 FMADD y10, a2, b8, y10
1120 FMADD y11, a3, b8, y11
1121 FMADD y12, a4, b8, y12
1123 addi AO1, AO1, 16 * SIZE
1124 addi AO2, AO2, 16 * SIZE
1125 addi AO3, AO3, 16 * SIZE
1126 addi AO4, AO4, 16 * SIZE
1128 FMADD y13, a5, b8, y13
1129 FMADD y14, a6, b8, y14
1130 FMADD y15, a7, b8, y15
1131 FMADD y16, a8, b8, y16
1133 addi AO5, AO5, 16 * SIZE
1134 addi AO6, AO6, 16 * SIZE
1135 addi AO7, AO7, 16 * SIZE
1136 addi AO8, AO8, 16 * SIZE
1137 addi BO, BO, 16 * SIZE
1147 LFD a1, 1 * SIZE(AO1)
1148 LFD b1, 1 * SIZE(BO)
1149 LFD a2, 1 * SIZE(AO2)
1150 LFD a3, 1 * SIZE(AO3)
1151 LFD a4, 1 * SIZE(AO4)
1152 LFD a5, 1 * SIZE(AO5)
1153 LFD a6, 1 * SIZE(AO6)
1154 LFD a7, 1 * SIZE(AO7)
1155 LFD a8, 1 * SIZE(AO8)
1157 LFD b2, 2 * SIZE(BO)
1158 LFD b3, 3 * SIZE(BO)
1159 LFD b4, 4 * SIZE(BO)
1161 FMADD y01, a1, b1, y01
1162 LFD a1, 2 * SIZE(AO1)
1163 FMADD y02, a2, b1, y02
1164 LFD a2, 2 * SIZE(AO2)
1165 FMADD y03, a3, b1, y03
1166 LFD a3, 2 * SIZE(AO3)
1167 FMADD y04, a4, b1, y04
1168 LFD a4, 2 * SIZE(AO4)
1169 FMADD y05, a5, b1, y05
1170 LFD a5, 2 * SIZE(AO5)
1171 FMADD y06, a6, b1, y06
1172 LFD a6, 2 * SIZE(AO6)
1173 FMADD y07, a7, b1, y07
1174 LFD a7, 2 * SIZE(AO7)
1175 FMADD y08, a8, b1, y08
1176 LFD a8, 2 * SIZE(AO8)
1178 FMADD y09, a1, b2, y09
1179 LFD a1, 3 * SIZE(AO1)
1180 FMADD y10, a2, b2, y10
1181 LFD a2, 3 * SIZE(AO2)
1182 FMADD y11, a3, b2, y11
1183 LFD a3, 3 * SIZE(AO3)
1184 FMADD y12, a4, b2, y12
1185 LFD a4, 3 * SIZE(AO4)
1186 FMADD y13, a5, b2, y13
1187 LFD a5, 3 * SIZE(AO5)
1188 FMADD y14, a6, b2, y14
1189 LFD a6, 3 * SIZE(AO6)
1190 FMADD y15, a7, b2, y15
1191 LFD a7, 3 * SIZE(AO7)
1192 FMADD y16, a8, b2, y16
1193 LFD a8, 3 * SIZE(AO8)
1195 LFD b5, 5 * SIZE(BO)
1196 LFD b6, 6 * SIZE(BO)
1197 LFD b7, 7 * SIZE(BO)
1198 LFD b8, 8 * SIZE(BO)
1200 FMADD y01, a1, b3, y01
1201 LFD a1, 4 * SIZE(AO1)
1202 FMADD y02, a2, b3, y02
1203 LFD a2, 4 * SIZE(AO2)
1204 FMADD y03, a3, b3, y03
1205 LFD a3, 4 * SIZE(AO3)
1206 FMADD y04, a4, b3, y04
1207 LFD a4, 4 * SIZE(AO4)
1208 FMADD y05, a5, b3, y05
1209 LFD a5, 4 * SIZE(AO5)
1210 FMADD y06, a6, b3, y06
1211 LFD a6, 4 * SIZE(AO6)
1212 FMADD y07, a7, b3, y07
1213 LFD a7, 4 * SIZE(AO7)
1214 FMADD y08, a8, b3, y08
1215 LFD a8, 4 * SIZE(AO8)
1217 FMADD y09, a1, b4, y09
1218 LFD a1, 5 * SIZE(AO1)
1219 FMADD y10, a2, b4, y10
1220 LFD a2, 5 * SIZE(AO2)
1221 FMADD y11, a3, b4, y11
1222 LFD a3, 5 * SIZE(AO3)
1223 FMADD y12, a4, b4, y12
1224 LFD a4, 5 * SIZE(AO4)
1225 FMADD y13, a5, b4, y13
1226 LFD a5, 5 * SIZE(AO5)
1227 FMADD y14, a6, b4, y14
1228 LFD a6, 5 * SIZE(AO6)
1229 FMADD y15, a7, b4, y15
1230 LFD a7, 5 * SIZE(AO7)
1231 FMADD y16, a8, b4, y16
1232 LFD a8, 5 * SIZE(AO8)
1234 FMADD y01, a1, b5, y01
1235 LFD a1, 6 * SIZE(AO1)
1236 FMADD y02, a2, b5, y02
1237 LFD a2, 6 * SIZE(AO2)
1238 FMADD y03, a3, b5, y03
1239 LFD a3, 6 * SIZE(AO3)
1240 FMADD y04, a4, b5, y04
1241 LFD a4, 6 * SIZE(AO4)
1242 FMADD y05, a5, b5, y05
1243 LFD a5, 6 * SIZE(AO5)
1244 FMADD y06, a6, b5, y06
1245 LFD a6, 6 * SIZE(AO6)
1246 FMADD y07, a7, b5, y07
1247 LFD a7, 6 * SIZE(AO7)
1248 FMADD y08, a8, b5, y08
1249 LFD a8, 6 * SIZE(AO8)
1251 FMADD y09, a1, b6, y09
1252 LFD a1, 7 * SIZE(AO1)
1253 FMADD y10, a2, b6, y10
1254 LFD a2, 7 * SIZE(AO2)
1255 FMADD y11, a3, b6, y11
1256 LFD a3, 7 * SIZE(AO3)
1257 FMADD y12, a4, b6, y12
1258 LFD a4, 7 * SIZE(AO4)
1259 FMADD y13, a5, b6, y13
1260 LFD a5, 7 * SIZE(AO5)
1261 FMADD y14, a6, b6, y14
1262 LFD a6, 7 * SIZE(AO6)
1263 FMADD y15, a7, b6, y15
1264 LFD a7, 7 * SIZE(AO7)
1265 FMADD y16, a8, b6, y16
1266 LFD a8, 7 * SIZE(AO8)
1268 FMADD y01, a1, b7, y01
1269 LFD a1, 8 * SIZE(AO1)
1270 FMADD y02, a2, b7, y02
1271 LFD a2, 8 * SIZE(AO2)
1272 FMADD y03, a3, b7, y03
1273 LFD a3, 8 * SIZE(AO3)
1274 FMADD y04, a4, b7, y04
1275 LFD a4, 8 * SIZE(AO4)
1276 FMADD y05, a5, b7, y05
1277 LFD a5, 8 * SIZE(AO5)
1278 FMADD y06, a6, b7, y06
1279 LFD a6, 8 * SIZE(AO6)
1280 FMADD y07, a7, b7, y07
1281 LFD a7, 8 * SIZE(AO7)
1282 FMADD y08, a8, b7, y08
1283 LFD a8, 8 * SIZE(AO8)
1285 FMADD y09, a1, b8, y09
1286 addi AO1, AO1, 8 * SIZE
1287 FMADD y10, a2, b8, y10
1288 addi AO2, AO2, 8 * SIZE
1289 FMADD y11, a3, b8, y11
1290 addi AO3, AO3, 8 * SIZE
1291 FMADD y12, a4, b8, y12
1292 addi AO4, AO4, 8 * SIZE
1293 FMADD y13, a5, b8, y13
1294 addi AO5, AO5, 8 * SIZE
1295 FMADD y14, a6, b8, y14
1296 addi AO6, AO6, 8 * SIZE
1297 FMADD y15, a7, b8, y15
1298 addi AO7, AO7, 8 * SIZE
1299 FMADD y16, a8, b8, y16
1300 addi AO8, AO8, 8 * SIZE
1301 addi BO, BO, 8 * SIZE
1308 LFD a1, 1 * SIZE(AO1)
1309 LFD b1, 1 * SIZE(BO)
1310 LFD a2, 1 * SIZE(AO2)
1311 LFD a3, 1 * SIZE(AO3)
1312 LFD a4, 1 * SIZE(AO4)
1313 LFD a5, 1 * SIZE(AO5)
1314 LFD a6, 1 * SIZE(AO6)
1315 LFD a7, 1 * SIZE(AO7)
1316 LFD a8, 1 * SIZE(AO8)
1318 LFD b2, 2 * SIZE(BO)
1319 LFD b3, 3 * SIZE(BO)
1320 LFD b4, 4 * SIZE(BO)
1322 FMADD y01, a1, b1, y01
1323 LFD a1, 2 * SIZE(AO1)
1324 FMADD y02, a2, b1, y02
1325 LFD a2, 2 * SIZE(AO2)
1326 FMADD y03, a3, b1, y03
1327 LFD a3, 2 * SIZE(AO3)
1328 FMADD y04, a4, b1, y04
1329 LFD a4, 2 * SIZE(AO4)
1330 FMADD y05, a5, b1, y05
1331 LFD a5, 2 * SIZE(AO5)
1332 FMADD y06, a6, b1, y06
1333 LFD a6, 2 * SIZE(AO6)
1334 FMADD y07, a7, b1, y07
1335 LFD a7, 2 * SIZE(AO7)
1336 FMADD y08, a8, b1, y08
1337 LFD a8, 2 * SIZE(AO8)
1339 FMADD y09, a1, b2, y09
1340 LFD a1, 3 * SIZE(AO1)
1341 FMADD y10, a2, b2, y10
1342 LFD a2, 3 * SIZE(AO2)
1343 FMADD y11, a3, b2, y11
1344 LFD a3, 3 * SIZE(AO3)
1345 FMADD y12, a4, b2, y12
1346 LFD a4, 3 * SIZE(AO4)
1347 FMADD y13, a5, b2, y13
1348 LFD a5, 3 * SIZE(AO5)
1349 FMADD y14, a6, b2, y14
1350 LFD a6, 3 * SIZE(AO6)
1351 FMADD y15, a7, b2, y15
1352 LFD a7, 3 * SIZE(AO7)
1353 FMADD y16, a8, b2, y16
1354 LFD a8, 3 * SIZE(AO8)
1356 FMADD y01, a1, b3, y01
1357 LFD a1, 4 * SIZE(AO1)
1358 FMADD y02, a2, b3, y02
1359 LFD a2, 4 * SIZE(AO2)
1360 FMADD y03, a3, b3, y03
1361 LFD a3, 4 * SIZE(AO3)
1362 FMADD y04, a4, b3, y04
1363 LFD a4, 4 * SIZE(AO4)
1365 FMADD y05, a5, b3, y05
1366 LFD a5, 4 * SIZE(AO5)
1367 FMADD y06, a6, b3, y06
1368 LFD a6, 4 * SIZE(AO6)
1369 FMADD y07, a7, b3, y07
1370 LFD a7, 4 * SIZE(AO7)
1371 FMADD y08, a8, b3, y08
1372 LFD a8, 4 * SIZE(AO8)
1374 FMADD y09, a1, b4, y09
1375 addi AO1, AO1, 4 * SIZE
1376 FMADD y10, a2, b4, y10
1377 addi AO2, AO2, 4 * SIZE
1378 FMADD y11, a3, b4, y11
1379 addi AO3, AO3, 4 * SIZE
1380 FMADD y12, a4, b4, y12
1381 addi AO4, AO4, 4 * SIZE
1382 FMADD y13, a5, b4, y13
1383 addi AO5, AO5, 4 * SIZE
1384 FMADD y14, a6, b4, y14
1385 addi AO6, AO6, 4 * SIZE
1386 FMADD y15, a7, b4, y15
1387 addi AO7, AO7, 4 * SIZE
1388 FMADD y16, a8, b4, y16
1389 addi AO8, AO8, 4 * SIZE
1390 addi BO, BO, 4 * SIZE
1397 LFD a1, 1 * SIZE(AO1)
1398 LFD b1, 1 * SIZE(BO)
1399 LFD a2, 1 * SIZE(AO2)
1400 LFD a3, 1 * SIZE(AO3)
1401 LFD a4, 1 * SIZE(AO4)
1402 LFD a5, 1 * SIZE(AO5)
1403 LFD a6, 1 * SIZE(AO6)
1404 LFD a7, 1 * SIZE(AO7)
1405 LFD a8, 1 * SIZE(AO8)
1407 LFD b2, 2 * SIZE(BO)
1409 FMADD y01, a1, b1, y01
1410 LFD a1, 2 * SIZE(AO1)
1411 FMADD y02, a2, b1, y02
1412 LFD a2, 2 * SIZE(AO2)
1413 FMADD y03, a3, b1, y03
1414 LFD a3, 2 * SIZE(AO3)
1415 FMADD y04, a4, b1, y04
1416 LFD a4, 2 * SIZE(AO4)
1417 FMADD y05, a5, b1, y05
1418 LFD a5, 2 * SIZE(AO5)
1419 FMADD y06, a6, b1, y06
1420 LFD a6, 2 * SIZE(AO6)
1421 FMADD y07, a7, b1, y07
1422 LFD a7, 2 * SIZE(AO7)
1423 FMADD y08, a8, b1, y08
1424 LFD a8, 2 * SIZE(AO8)
1426 FMADD y09, a1, b2, y09
1427 addi AO1, AO1, 2 * SIZE
1428 addi AO2, AO2, 2 * SIZE
1429 FMADD y10, a2, b2, y10
1430 addi AO3, AO3, 2 * SIZE
1431 addi AO4, AO4, 2 * SIZE
1432 FMADD y11, a3, b2, y11
1433 FMADD y12, a4, b2, y12
1434 addi AO5, AO5, 2 * SIZE
1435 addi AO6, AO6, 2 * SIZE
1436 FMADD y13, a5, b2, y13
1437 FMADD y14, a6, b2, y14
1438 addi AO7, AO7, 2 * SIZE
1439 addi AO8, AO8, 2 * SIZE
1440 FMADD y15, a7, b2, y15
1441 FMADD y16, a8, b2, y16
1442 addi BO, BO, 2 * SIZE
1449 LFD a1, 1 * SIZE(AO1)
1450 LFD b1, 1 * SIZE(BO)
1451 LFD a2, 1 * SIZE(AO2)
1452 LFD a3, 1 * SIZE(AO3)
1453 LFD a4, 1 * SIZE(AO4)
1454 LFD a5, 1 * SIZE(AO5)
1455 LFD a6, 1 * SIZE(AO6)
1456 LFD a7, 1 * SIZE(AO7)
1457 LFD a8, 1 * SIZE(AO8)
1459 FMADD y01, a1, b1, y01
1460 FMADD y02, a2, b1, y02
1461 FMADD y03, a3, b1, y03
1462 FMADD y04, a4, b1, y04
1463 FMADD y05, a5, b1, y05
1464 FMADD y06, a6, b1, y06
1465 FMADD y07, a7, b1, y07
1466 FMADD y08, a8, b1, y08
1472 cmpi cr0, 0, INCY, SIZE
1475 LFD a1, 1 * SIZE(CO)
1476 LFD a2, 2 * SIZE(CO)
1477 LFD a3, 3 * SIZE(CO)
1478 LFD a4, 4 * SIZE(CO)
1479 LFD a5, 5 * SIZE(CO)
1480 LFD a6, 6 * SIZE(CO)
1481 LFD a7, 7 * SIZE(CO)
1482 LFD a8, 8 * SIZE(CO)
1493 FMADD a1, alpha, y01, a1
1494 FMADD a2, alpha, y02, a2
1495 FMADD a3, alpha, y03, a3
1496 FMADD a4, alpha, y04, a4
1497 FMADD a5, alpha, y05, a5
1498 FMADD a6, alpha, y06, a6
1499 FMADD a7, alpha, y07, a7
1500 FMADD a8, alpha, y08, a8
1502 STFD a1, 1 * SIZE(CO)
1503 STFD a2, 2 * SIZE(CO)
1504 STFD a3, 3 * SIZE(CO)
1505 STFD a4, 4 * SIZE(CO)
1506 STFD a5, 5 * SIZE(CO)
1507 STFD a6, 6 * SIZE(CO)
1508 STFD a7, 7 * SIZE(CO)
1509 STFD a8, 8 * SIZE(CO)
1512 addi CO, CO, 8 * SIZE
1537 FMADD a1, alpha, f0, a1
1538 FMADD a2, alpha, f1, a2
1539 FMADD a3, alpha, f2, a3
1540 FMADD a4, alpha, f3, a4
1541 FMADD a5, alpha, f4, a5
1542 FMADD a6, alpha, f5, a6
1543 FMADD a7, alpha, f6, a7
1544 FMADD a8, alpha, f7, a8
1589 LFD a1, 1 * SIZE(AO1)
1590 LFD a2, 1 * SIZE(AO2)
1591 LFD a3, 1 * SIZE(AO3)
1592 LFD a4, 1 * SIZE(AO4)
1593 LFD a5, 2 * SIZE(AO1)
1594 LFD a6, 2 * SIZE(AO2)
1595 LFD a7, 2 * SIZE(AO3)
1596 LFD a8, 2 * SIZE(AO4)
1598 LFD b1, 1 * SIZE(BO)
1599 LFD b2, 2 * SIZE(BO)
1600 LFD b3, 3 * SIZE(BO)
1601 LFD b4, 4 * SIZE(BO)
1602 LFD b5, 5 * SIZE(BO)
1603 LFD b6, 6 * SIZE(BO)
1604 LFD b7, 7 * SIZE(BO)
1605 LFD b8, 8 * SIZE(BO)
1610 FMADD y01, a1, b1, y01
1611 LFD a1, 3 * SIZE(AO1)
1612 FMADD y02, a2, b1, y02
1613 LFD a2, 3 * SIZE(AO2)
1614 FMADD y03, a3, b1, y03
1615 LFD a3, 3 * SIZE(AO3)
1616 FMADD y04, a4, b1, y04
1617 LFD a4, 3 * SIZE(AO4)
1619 FMADD y09, a5, b2, y09
1620 LFD a5, 4 * SIZE(AO1)
1621 FMADD y10, a6, b2, y10
1622 LFD a6, 4 * SIZE(AO2)
1623 FMADD y11, a7, b2, y11
1624 LFD a7, 4 * SIZE(AO3)
1625 FMADD y12, a8, b2, y12
1626 LFD a8, 4 * SIZE(AO4)
1628 FMADD y01, a1, b3, y01
1629 LFD a1, 5 * SIZE(AO1)
1630 FMADD y02, a2, b3, y02
1631 LFD a2, 5 * SIZE(AO2)
1632 FMADD y03, a3, b3, y03
1633 LFD a3, 5 * SIZE(AO3)
1634 FMADD y04, a4, b3, y04
1635 LFD a4, 5 * SIZE(AO4)
1637 FMADD y09, a5, b4, y09
1638 LFD a5, 6 * SIZE(AO1)
1639 FMADD y10, a6, b4, y10
1640 LFD a6, 6 * SIZE(AO2)
1641 FMADD y11, a7, b4, y11
1642 LFD a7, 6 * SIZE(AO3)
1643 FMADD y12, a8, b4, y12
1644 LFD a8, 6 * SIZE(AO4)
1646 LFD b1, 9 * SIZE(BO)
1647 LFD b2, 10 * SIZE(BO)
1648 LFD b3, 11 * SIZE(BO)
1649 LFD b4, 12 * SIZE(BO)
1651 FMADD y01, a1, b5, y01
1652 LFD a1, 7 * SIZE(AO1)
1653 FMADD y02, a2, b5, y02
1654 LFD a2, 7 * SIZE(AO2)
1655 FMADD y03, a3, b5, y03
1656 LFD a3, 7 * SIZE(AO3)
1657 FMADD y04, a4, b5, y04
1658 LFD a4, 7 * SIZE(AO4)
1660 FMADD y09, a5, b6, y09
1661 LFD a5, 8 * SIZE(AO1)
1662 FMADD y10, a6, b6, y10
1663 LFD a6, 8 * SIZE(AO2)
1664 FMADD y11, a7, b6, y11
1665 LFD a7, 8 * SIZE(AO3)
1666 FMADD y12, a8, b6, y12
1667 LFD a8, 8 * SIZE(AO4)
1669 FMADD y01, a1, b7, y01
1670 LFD a1, 9 * SIZE(AO1)
1671 FMADD y02, a2, b7, y02
1672 LFD a2, 9 * SIZE(AO2)
1673 FMADD y03, a3, b7, y03
1674 LFD a3, 9 * SIZE(AO3)
1675 FMADD y04, a4, b7, y04
1676 LFD a4, 9 * SIZE(AO4)
1678 FMADD y09, a5, b8, y09
1679 LFD a5, 10 * SIZE(AO1)
1680 FMADD y10, a6, b8, y10
1681 LFD a6, 10 * SIZE(AO2)
1682 FMADD y11, a7, b8, y11
1683 LFD a7, 10 * SIZE(AO3)
1684 FMADD y12, a8, b8, y12
1685 LFD a8, 10 * SIZE(AO4)
1687 LFD b5, 13 * SIZE(BO)
1688 LFD b6, 14 * SIZE(BO)
1689 LFD b7, 15 * SIZE(BO)
1690 LFD b8, 16 * SIZE(BO)
1692 FMADD y01, a1, b1, y01
1693 LFD a1, 11 * SIZE(AO1)
1694 FMADD y02, a2, b1, y02
1695 LFD a2, 11 * SIZE(AO2)
1696 FMADD y03, a3, b1, y03
1697 LFD a3, 11 * SIZE(AO3)
1698 FMADD y04, a4, b1, y04
1699 LFD a4, 11 * SIZE(AO4)
1701 FMADD y09, a5, b2, y09
1702 LFD a5, 12 * SIZE(AO1)
1703 FMADD y10, a6, b2, y10
1704 LFD a6, 12 * SIZE(AO2)
1705 FMADD y11, a7, b2, y11
1706 LFD a7, 12 * SIZE(AO3)
1707 FMADD y12, a8, b2, y12
1708 LFD a8, 12 * SIZE(AO4)
1710 FMADD y01, a1, b3, y01
1711 LFD a1, 13 * SIZE(AO1)
1712 FMADD y02, a2, b3, y02
1713 LFD a2, 13 * SIZE(AO2)
1714 FMADD y03, a3, b3, y03
1715 LFD a3, 13 * SIZE(AO3)
1716 FMADD y04, a4, b3, y04
1717 LFD a4, 13 * SIZE(AO4)
1719 FMADD y09, a5, b4, y09
1720 LFD a5, 14 * SIZE(AO1)
1721 FMADD y10, a6, b4, y10
1722 LFD a6, 14 * SIZE(AO2)
1723 FMADD y11, a7, b4, y11
1724 LFD a7, 14 * SIZE(AO3)
1725 FMADD y12, a8, b4, y12
1726 LFD a8, 14 * SIZE(AO4)
1728 LFD b1, 17 * SIZE(BO)
1729 LFD b2, 18 * SIZE(BO)
1730 LFD b3, 19 * SIZE(BO)
1731 LFD b4, 20 * SIZE(BO)
1733 FMADD y01, a1, b5, y01
1734 LFD a1, 15 * SIZE(AO1)
1735 FMADD y02, a2, b5, y02
1736 LFD a2, 15 * SIZE(AO2)
1737 FMADD y03, a3, b5, y03
1738 LFD a3, 15 * SIZE(AO3)
1739 FMADD y04, a4, b5, y04
1740 LFD a4, 15 * SIZE(AO4)
1742 FMADD y09, a5, b6, y09
1743 LFD a5, 16 * SIZE(AO1)
1744 FMADD y10, a6, b6, y10
1745 LFD a6, 16 * SIZE(AO2)
1746 FMADD y11, a7, b6, y11
1747 LFD a7, 16 * SIZE(AO3)
1748 FMADD y12, a8, b6, y12
1749 LFD a8, 16 * SIZE(AO4)
1751 FMADD y01, a1, b7, y01
1752 LFD a1, 17 * SIZE(AO1)
1753 FMADD y02, a2, b7, y02
1754 LFD a2, 17 * SIZE(AO2)
1755 FMADD y03, a3, b7, y03
1756 LFD a3, 17 * SIZE(AO3)
1757 FMADD y04, a4, b7, y04
1758 LFD a4, 17 * SIZE(AO4)
1760 FMADD y09, a5, b8, y09
1761 LFD a5, 18 * SIZE(AO1)
1762 FMADD y10, a6, b8, y10
1763 LFD a6, 18 * SIZE(AO2)
1764 FMADD y11, a7, b8, y11
1765 LFD a7, 18 * SIZE(AO3)
1766 FMADD y12, a8, b8, y12
1767 LFD a8, 18 * SIZE(AO4)
1769 LFD b5, 21 * SIZE(BO)
1770 LFD b6, 22 * SIZE(BO)
1771 LFD b7, 23 * SIZE(BO)
1772 LFD b8, 24 * SIZE(BO)
1774 addi AO1, AO1, 16 * SIZE
1775 addi AO2, AO2, 16 * SIZE
1779 addi AO3, AO3, 16 * SIZE
1780 addi AO4, AO4, 16 * SIZE
1784 addi BO, BO, 16 * SIZE
1789 FMADD y01, a1, b1, y01
1790 LFD a1, 3 * SIZE(AO1)
1791 FMADD y02, a2, b1, y02
1792 LFD a2, 3 * SIZE(AO2)
1793 FMADD y03, a3, b1, y03
1794 LFD a3, 3 * SIZE(AO3)
1795 FMADD y04, a4, b1, y04
1796 LFD a4, 3 * SIZE(AO4)
1798 FMADD y09, a5, b2, y09
1799 LFD a5, 4 * SIZE(AO1)
1800 FMADD y10, a6, b2, y10
1801 LFD a6, 4 * SIZE(AO2)
1802 FMADD y11, a7, b2, y11
1803 LFD a7, 4 * SIZE(AO3)
1804 FMADD y12, a8, b2, y12
1805 LFD a8, 4 * SIZE(AO4)
1807 FMADD y01, a1, b3, y01
1808 LFD a1, 5 * SIZE(AO1)
1809 FMADD y02, a2, b3, y02
1810 LFD a2, 5 * SIZE(AO2)
1811 FMADD y03, a3, b3, y03
1812 LFD a3, 5 * SIZE(AO3)
1813 FMADD y04, a4, b3, y04
1814 LFD a4, 5 * SIZE(AO4)
1816 FMADD y09, a5, b4, y09
1817 LFD a5, 6 * SIZE(AO1)
1818 FMADD y10, a6, b4, y10
1819 LFD a6, 6 * SIZE(AO2)
1820 FMADD y11, a7, b4, y11
1821 LFD a7, 6 * SIZE(AO3)
1822 FMADD y12, a8, b4, y12
1823 LFD a8, 6 * SIZE(AO4)
1825 LFD b1, 9 * SIZE(BO)
1826 LFD b2, 10 * SIZE(BO)
1827 LFD b3, 11 * SIZE(BO)
1828 LFD b4, 12 * SIZE(BO)
1830 FMADD y01, a1, b5, y01
1831 LFD a1, 7 * SIZE(AO1)
1832 FMADD y02, a2, b5, y02
1833 LFD a2, 7 * SIZE(AO2)
1834 FMADD y03, a3, b5, y03
1835 LFD a3, 7 * SIZE(AO3)
1836 FMADD y04, a4, b5, y04
1837 LFD a4, 7 * SIZE(AO4)
1839 FMADD y09, a5, b6, y09
1840 LFD a5, 8 * SIZE(AO1)
1841 FMADD y10, a6, b6, y10
1842 LFD a6, 8 * SIZE(AO2)
1843 FMADD y11, a7, b6, y11
1844 LFD a7, 8 * SIZE(AO3)
1845 FMADD y12, a8, b6, y12
1846 LFD a8, 8 * SIZE(AO4)
1848 FMADD y01, a1, b7, y01
1849 LFD a1, 9 * SIZE(AO1)
1850 FMADD y02, a2, b7, y02
1851 LFD a2, 9 * SIZE(AO2)
1852 FMADD y03, a3, b7, y03
1853 LFD a3, 9 * SIZE(AO3)
1854 FMADD y04, a4, b7, y04
1855 LFD a4, 9 * SIZE(AO4)
1857 FMADD y09, a5, b8, y09
1858 LFD a5, 10 * SIZE(AO1)
1859 FMADD y10, a6, b8, y10
1860 LFD a6, 10 * SIZE(AO2)
1861 FMADD y11, a7, b8, y11
1862 LFD a7, 10 * SIZE(AO3)
1863 FMADD y12, a8, b8, y12
1864 LFD a8, 10 * SIZE(AO4)
1866 LFD b5, 13 * SIZE(BO)
1867 LFD b6, 14 * SIZE(BO)
1868 LFD b7, 15 * SIZE(BO)
1869 LFD b8, 16 * SIZE(BO)
1871 FMADD y01, a1, b1, y01
1872 LFD a1, 11 * SIZE(AO1)
1873 FMADD y02, a2, b1, y02
1874 LFD a2, 11 * SIZE(AO2)
1875 FMADD y03, a3, b1, y03
1876 LFD a3, 11 * SIZE(AO3)
1877 FMADD y04, a4, b1, y04
1878 LFD a4, 11 * SIZE(AO4)
1880 FMADD y09, a5, b2, y09
1881 LFD a5, 12 * SIZE(AO1)
1882 FMADD y10, a6, b2, y10
1883 LFD a6, 12 * SIZE(AO2)
1884 FMADD y11, a7, b2, y11
1885 LFD a7, 12 * SIZE(AO3)
1886 FMADD y12, a8, b2, y12
1887 LFD a8, 12 * SIZE(AO4)
1889 FMADD y01, a1, b3, y01
1890 LFD a1, 13 * SIZE(AO1)
1891 FMADD y02, a2, b3, y02
1892 LFD a2, 13 * SIZE(AO2)
1893 FMADD y03, a3, b3, y03
1894 LFD a3, 13 * SIZE(AO3)
1895 FMADD y04, a4, b3, y04
1896 LFD a4, 13 * SIZE(AO4)
1898 FMADD y09, a5, b4, y09
1899 LFD a5, 14 * SIZE(AO1)
1900 FMADD y10, a6, b4, y10
1901 LFD a6, 14 * SIZE(AO2)
1902 FMADD y11, a7, b4, y11
1903 LFD a7, 14 * SIZE(AO3)
1904 FMADD y12, a8, b4, y12
1905 LFD a8, 14 * SIZE(AO4)
1907 FMADD y01, a1, b5, y01
1908 LFD a1, 15 * SIZE(AO1)
1909 FMADD y02, a2, b5, y02
1910 LFD a2, 15 * SIZE(AO2)
1911 FMADD y03, a3, b5, y03
1912 LFD a3, 15 * SIZE(AO3)
1913 FMADD y04, a4, b5, y04
1914 LFD a4, 15 * SIZE(AO4)
1916 FMADD y09, a5, b6, y09
1917 LFD a5, 16 * SIZE(AO1)
1918 FMADD y10, a6, b6, y10
1919 LFD a6, 16 * SIZE(AO2)
1920 FMADD y11, a7, b6, y11
1921 LFD a7, 16 * SIZE(AO3)
1922 FMADD y12, a8, b6, y12
1923 LFD a8, 16 * SIZE(AO4)
1925 FMADD y01, a1, b7, y01
1926 FMADD y02, a2, b7, y02
1927 FMADD y03, a3, b7, y03
1928 FMADD y04, a4, b7, y04
1930 FMADD y09, a5, b8, y09
1931 FMADD y10, a6, b8, y10
1932 FMADD y11, a7, b8, y11
1933 FMADD y12, a8, b8, y12
1935 addi AO1, AO1, 16 * SIZE
1936 addi AO2, AO2, 16 * SIZE
1937 addi AO3, AO3, 16 * SIZE
1938 addi AO4, AO4, 16 * SIZE
1939 addi BO, BO, 16 * SIZE
1949 LFD a1, 1 * SIZE(AO1)
1950 LFD a2, 1 * SIZE(AO2)
1951 LFD a3, 1 * SIZE(AO3)
1952 LFD a4, 1 * SIZE(AO4)
1954 LFD b1, 1 * SIZE(BO)
1955 LFD b2, 2 * SIZE(BO)
1956 LFD b3, 3 * SIZE(BO)
1957 LFD b4, 4 * SIZE(BO)
1959 LFD a5, 2 * SIZE(AO1)
1960 LFD a6, 2 * SIZE(AO2)
1961 LFD a7, 2 * SIZE(AO3)
1962 LFD a8, 2 * SIZE(AO4)
1964 FMADD y01, a1, b1, y01
1965 LFD a1, 3 * SIZE(AO1)
1966 FMADD y02, a2, b1, y02
1967 LFD a2, 3 * SIZE(AO2)
1968 FMADD y03, a3, b1, y03
1969 LFD a3, 3 * SIZE(AO3)
1970 FMADD y04, a4, b1, y04
1971 LFD a4, 3 * SIZE(AO4)
1973 FMADD y09, a5, b2, y09
1974 LFD a5, 4 * SIZE(AO1)
1975 FMADD y10, a6, b2, y10
1976 LFD a6, 4 * SIZE(AO2)
1977 FMADD y11, a7, b2, y11
1978 LFD a7, 4 * SIZE(AO3)
1979 FMADD y12, a8, b2, y12
1980 LFD a8, 4 * SIZE(AO4)
1982 FMADD y01, a1, b3, y01
1983 LFD a1, 5 * SIZE(AO1)
1984 FMADD y02, a2, b3, y02
1985 LFD a2, 5 * SIZE(AO2)
1986 FMADD y03, a3, b3, y03
1987 LFD a3, 5 * SIZE(AO3)
1988 FMADD y04, a4, b3, y04
1989 LFD a4, 5 * SIZE(AO4)
1991 FMADD y09, a5, b4, y09
1992 LFD a5, 6 * SIZE(AO1)
1993 FMADD y10, a6, b4, y10
1994 LFD a6, 6 * SIZE(AO2)
1995 FMADD y11, a7, b4, y11
1996 LFD a7, 6 * SIZE(AO3)
1997 FMADD y12, a8, b4, y12
1998 LFD a8, 6 * SIZE(AO4)
2000 LFD b1, 5 * SIZE(BO)
2001 LFD b2, 6 * SIZE(BO)
2002 LFD b3, 7 * SIZE(BO)
2003 LFD b4, 8 * SIZE(BO)
2005 FMADD y01, a1, b1, y01
2006 LFD a1, 7 * SIZE(AO1)
2007 FMADD y02, a2, b1, y02
2008 LFD a2, 7 * SIZE(AO2)
2009 FMADD y03, a3, b1, y03
2010 LFD a3, 7 * SIZE(AO3)
2011 FMADD y04, a4, b1, y04
2012 LFD a4, 7 * SIZE(AO4)
2014 FMADD y09, a5, b2, y09
2015 LFD a5, 8 * SIZE(AO1)
2016 FMADD y10, a6, b2, y10
2017 LFD a6, 8 * SIZE(AO2)
2018 FMADD y11, a7, b2, y11
2019 LFD a7, 8 * SIZE(AO3)
2020 FMADD y12, a8, b2, y12
2021 LFD a8, 8 * SIZE(AO4)
2023 FMADD y01, a1, b3, y01
2024 FMADD y02, a2, b3, y02
2025 FMADD y03, a3, b3, y03
2026 FMADD y04, a4, b3, y04
2028 FMADD y09, a5, b4, y09
2029 addi AO1, AO1, 8 * SIZE
2030 FMADD y10, a6, b4, y10
2031 addi AO2, AO2, 8 * SIZE
2032 FMADD y11, a7, b4, y11
2033 addi AO3, AO3, 8 * SIZE
2034 FMADD y12, a8, b4, y12
2035 addi AO4, AO4, 8 * SIZE
2037 addi BO, BO, 8 * SIZE
2044 LFD a1, 1 * SIZE(AO1)
2045 LFD a2, 1 * SIZE(AO2)
2046 LFD a3, 1 * SIZE(AO3)
2047 LFD a4, 1 * SIZE(AO4)
2049 LFD b1, 1 * SIZE(BO)
2050 LFD b2, 2 * SIZE(BO)
2051 LFD b3, 3 * SIZE(BO)
2052 LFD b4, 4 * SIZE(BO)
2054 LFD a5, 2 * SIZE(AO1)
2055 LFD a6, 2 * SIZE(AO2)
2056 LFD a7, 2 * SIZE(AO3)
2057 LFD a8, 2 * SIZE(AO4)
2059 FMADD y01, a1, b1, y01
2060 LFD a1, 3 * SIZE(AO1)
2061 FMADD y02, a2, b1, y02
2062 LFD a2, 3 * SIZE(AO2)
2063 FMADD y03, a3, b1, y03
2064 LFD a3, 3 * SIZE(AO3)
2065 FMADD y04, a4, b1, y04
2066 LFD a4, 3 * SIZE(AO4)
2068 FMADD y09, a5, b2, y09
2069 LFD a5, 4 * SIZE(AO1)
2070 FMADD y10, a6, b2, y10
2071 LFD a6, 4 * SIZE(AO2)
2072 FMADD y11, a7, b2, y11
2073 LFD a7, 4 * SIZE(AO3)
2074 FMADD y12, a8, b2, y12
2075 LFD a8, 4 * SIZE(AO4)
2077 FMADD y01, a1, b3, y01
2078 FMADD y02, a2, b3, y02
2079 FMADD y03, a3, b3, y03
2080 FMADD y04, a4, b3, y04
2082 FMADD y09, a5, b4, y09
2083 addi AO1, AO1, 4 * SIZE
2084 FMADD y10, a6, b4, y10
2085 addi AO2, AO2, 4 * SIZE
2086 FMADD y11, a7, b4, y11
2087 addi AO3, AO3, 4 * SIZE
2088 FMADD y12, a8, b4, y12
2089 addi AO4, AO4, 4 * SIZE
2090 addi BO, BO, 4 * SIZE
2097 LFD a1, 1 * SIZE(AO1)
2098 LFD a2, 1 * SIZE(AO2)
2099 LFD b1, 1 * SIZE(BO)
2100 LFD b2, 2 * SIZE(BO)
2102 LFD a3, 1 * SIZE(AO3)
2103 LFD a4, 1 * SIZE(AO4)
2105 LFD a5, 2 * SIZE(AO1)
2106 LFD a6, 2 * SIZE(AO2)
2107 LFD a7, 2 * SIZE(AO3)
2108 LFD a8, 2 * SIZE(AO4)
2110 FMADD y01, a1, b1, y01
2111 FMADD y02, a2, b1, y02
2112 FMADD y03, a3, b1, y03
2113 FMADD y04, a4, b1, y04
2115 FMADD y09, a5, b2, y09
2116 addi AO1, AO1, 2 * SIZE
2117 FMADD y10, a6, b2, y10
2118 addi AO2, AO2, 2 * SIZE
2119 FMADD y11, a7, b2, y11
2120 addi AO3, AO3, 2 * SIZE
2121 FMADD y12, a8, b2, y12
2122 addi AO4, AO4, 2 * SIZE
2123 addi BO, BO, 2 * SIZE
2130 LFD a1, 1 * SIZE(AO1)
2131 LFD b1, 1 * SIZE(BO)
2133 LFD a2, 1 * SIZE(AO2)
2134 LFD a3, 1 * SIZE(AO3)
2135 LFD a4, 1 * SIZE(AO4)
2137 FMADD y01, a1, b1, y01
2138 FMADD y02, a2, b1, y02
2139 FMADD y03, a3, b1, y03
2140 FMADD y04, a4, b1, y04
2146 cmpi cr0, 0, INCY, SIZE
2149 LFD a1, 1 * SIZE(CO)
2150 LFD a2, 2 * SIZE(CO)
2151 LFD a3, 3 * SIZE(CO)
2152 LFD a4, 4 * SIZE(CO)
2159 FMADD a1, alpha, y01, a1
2160 FMADD a2, alpha, y02, a2
2161 FMADD a3, alpha, y03, a3
2162 FMADD a4, alpha, y04, a4
2164 STFD a1, 1 * SIZE(CO)
2165 STFD a2, 2 * SIZE(CO)
2166 STFD a3, 3 * SIZE(CO)
2167 STFD a4, 4 * SIZE(CO)
2169 addi CO, CO, 4 * SIZE
2184 FMADD a1, alpha, f0, a1
2185 FMADD a2, alpha, f1, a2
2186 FMADD a3, alpha, f2, a3
2187 FMADD a4, alpha, f3, a4
2220 LFD a1, 1 * SIZE(AO1)
2221 LFD a2, 1 * SIZE(AO2)
2222 LFD a3, 2 * SIZE(AO1)
2223 LFD a4, 2 * SIZE(AO2)
2224 LFD a5, 3 * SIZE(AO1)
2225 LFD a6, 3 * SIZE(AO2)
2226 LFD a7, 4 * SIZE(AO1)
2227 LFD a8, 4 * SIZE(AO2)
2229 LFD b1, 1 * SIZE(BO)
2230 LFD b2, 2 * SIZE(BO)
2231 LFD b3, 3 * SIZE(BO)
2232 LFD b4, 4 * SIZE(BO)
2233 LFD b5, 5 * SIZE(BO)
2234 LFD b6, 6 * SIZE(BO)
2235 LFD b7, 7 * SIZE(BO)
2236 LFD b8, 8 * SIZE(BO)
2241 FMADD y01, a1, b1, y01
2242 LFD a1, 5 * SIZE(AO1)
2243 FMADD y02, a2, b1, y02
2244 LFD a2, 5 * SIZE(AO2)
2245 FMADD y03, a3, b2, y03
2246 LFD a3, 6 * SIZE(AO1)
2247 FMADD y04, a4, b2, y04
2248 LFD a4, 6 * SIZE(AO2)
2250 FMADD y09, a5, b3, y09
2251 LFD a5, 7 * SIZE(AO1)
2252 FMADD y10, a6, b3, y10
2253 LFD a6, 7 * SIZE(AO2)
2254 FMADD y11, a7, b4, y11
2255 LFD a7, 8 * SIZE(AO1)
2256 FMADD y12, a8, b4, y12
2257 LFD a8, 8 * SIZE(AO2)
2259 LFD b1, 9 * SIZE(BO)
2260 LFD b2, 10 * SIZE(BO)
2261 LFD b3, 11 * SIZE(BO)
2262 LFD b4, 12 * SIZE(BO)
2264 FMADD y01, a1, b5, y01
2265 LFD a1, 9 * SIZE(AO1)
2266 FMADD y02, a2, b5, y02
2267 LFD a2, 9 * SIZE(AO2)
2268 FMADD y03, a3, b6, y03
2269 LFD a3, 10 * SIZE(AO1)
2270 FMADD y04, a4, b6, y04
2271 LFD a4, 10 * SIZE(AO2)
2273 FMADD y09, a5, b7, y09
2274 LFD a5, 11 * SIZE(AO1)
2275 FMADD y10, a6, b7, y10
2276 LFD a6, 11 * SIZE(AO2)
2277 FMADD y11, a7, b8, y11
2278 LFD a7, 12 * SIZE(AO1)
2279 FMADD y12, a8, b8, y12
2280 LFD a8, 12 * SIZE(AO2)
2282 LFD b5, 13 * SIZE(BO)
2283 LFD b6, 14 * SIZE(BO)
2284 LFD b7, 15 * SIZE(BO)
2285 LFD b8, 16 * SIZE(BO)
2287 FMADD y01, a1, b1, y01
2288 LFD a1, 13 * SIZE(AO1)
2289 FMADD y02, a2, b1, y02
2290 LFD a2, 13 * SIZE(AO2)
2291 FMADD y03, a3, b2, y03
2292 LFD a3, 14 * SIZE(AO1)
2293 FMADD y04, a4, b2, y04
2294 LFD a4, 14 * SIZE(AO2)
2296 FMADD y09, a5, b3, y09
2297 LFD a5, 15 * SIZE(AO1)
2298 FMADD y10, a6, b3, y10
2299 LFD a6, 15 * SIZE(AO2)
2300 FMADD y11, a7, b4, y11
2301 LFD a7, 16 * SIZE(AO1)
2302 FMADD y12, a8, b4, y12
2303 LFD a8, 16 * SIZE(AO2)
2305 LFD b1, 17 * SIZE(BO)
2306 LFD b2, 18 * SIZE(BO)
2307 LFD b3, 19 * SIZE(BO)
2308 LFD b4, 20 * SIZE(BO)
2310 FMADD y01, a1, b5, y01
2311 LFD a1, 17 * SIZE(AO1)
2312 FMADD y02, a2, b5, y02
2313 LFD a2, 17 * SIZE(AO2)
2314 FMADD y03, a3, b6, y03
2315 LFD a3, 18 * SIZE(AO1)
2316 FMADD y04, a4, b6, y04
2317 LFD a4, 18 * SIZE(AO2)
2319 FMADD y09, a5, b7, y09
2320 LFD a5, 19 * SIZE(AO1)
2321 FMADD y10, a6, b7, y10
2322 LFD a6, 19 * SIZE(AO2)
2323 FMADD y11, a7, b8, y11
2324 LFD a7, 20 * SIZE(AO1)
2325 FMADD y12, a8, b8, y12
2326 LFD a8, 20 * SIZE(AO2)
2328 LFD b5, 21 * SIZE(BO)
2329 LFD b6, 22 * SIZE(BO)
2330 LFD b7, 23 * SIZE(BO)
2331 LFD b8, 24 * SIZE(BO)
2333 addi AO1, AO1, 16 * SIZE
2334 addi AO2, AO2, 16 * SIZE
2338 addi BO, BO, 16 * SIZE
2343 FMADD y01, a1, b1, y01
2344 LFD a1, 5 * SIZE(AO1)
2345 FMADD y02, a2, b1, y02
2346 LFD a2, 5 * SIZE(AO2)
2347 FMADD y03, a3, b2, y03
2348 LFD a3, 6 * SIZE(AO1)
2349 FMADD y04, a4, b2, y04
2350 LFD a4, 6 * SIZE(AO2)
2352 FMADD y09, a5, b3, y09
2353 LFD a5, 7 * SIZE(AO1)
2354 FMADD y10, a6, b3, y10
2355 LFD a6, 7 * SIZE(AO2)
2356 FMADD y11, a7, b4, y11
2357 LFD a7, 8 * SIZE(AO1)
2358 FMADD y12, a8, b4, y12
2359 LFD a8, 8 * SIZE(AO2)
2361 LFD b1, 9 * SIZE(BO)
2362 LFD b2, 10 * SIZE(BO)
2363 LFD b3, 11 * SIZE(BO)
2364 LFD b4, 12 * SIZE(BO)
2366 FMADD y01, a1, b5, y01
2367 LFD a1, 9 * SIZE(AO1)
2368 FMADD y02, a2, b5, y02
2369 LFD a2, 9 * SIZE(AO2)
2370 FMADD y03, a3, b6, y03
2371 LFD a3, 10 * SIZE(AO1)
2372 FMADD y04, a4, b6, y04
2373 LFD a4, 10 * SIZE(AO2)
2375 FMADD y09, a5, b7, y09
2376 LFD a5, 11 * SIZE(AO1)
2377 FMADD y10, a6, b7, y10
2378 LFD a6, 11 * SIZE(AO2)
2379 FMADD y11, a7, b8, y11
2380 LFD a7, 12 * SIZE(AO1)
2381 FMADD y12, a8, b8, y12
2382 LFD a8, 12 * SIZE(AO2)
2384 LFD b5, 13 * SIZE(BO)
2385 LFD b6, 14 * SIZE(BO)
2386 LFD b7, 15 * SIZE(BO)
2387 LFD b8, 16 * SIZE(BO)
2389 FMADD y01, a1, b1, y01
2390 LFD a1, 13 * SIZE(AO1)
2391 FMADD y02, a2, b1, y02
2392 LFD a2, 13 * SIZE(AO2)
2393 FMADD y03, a3, b2, y03
2394 LFD a3, 14 * SIZE(AO1)
2395 FMADD y04, a4, b2, y04
2396 LFD a4, 14 * SIZE(AO2)
2398 FMADD y09, a5, b3, y09
2399 LFD a5, 15 * SIZE(AO1)
2400 FMADD y10, a6, b3, y10
2401 LFD a6, 15 * SIZE(AO2)
2402 FMADD y11, a7, b4, y11
2403 LFD a7, 16 * SIZE(AO1)
2404 FMADD y12, a8, b4, y12
2405 LFD a8, 16 * SIZE(AO2)
2407 FMADD y01, a1, b5, y01
2408 FMADD y02, a2, b5, y02
2409 FMADD y03, a3, b6, y03
2410 FMADD y04, a4, b6, y04
2412 FMADD y09, a5, b7, y09
2413 FMADD y10, a6, b7, y10
2414 FMADD y11, a7, b8, y11
2415 FMADD y12, a8, b8, y12
2417 addi AO1, AO1, 16 * SIZE
2418 addi AO2, AO2, 16 * SIZE
2419 addi BO, BO, 16 * SIZE
2428 LFD a1, 1 * SIZE(AO1)
2429 LFD a2, 1 * SIZE(AO2)
2430 LFD a3, 2 * SIZE(AO1)
2431 LFD a4, 2 * SIZE(AO2)
2433 LFD b1, 1 * SIZE(BO)
2434 LFD b2, 2 * SIZE(BO)
2435 LFD b3, 3 * SIZE(BO)
2436 LFD b4, 4 * SIZE(BO)
2438 LFD a5, 3 * SIZE(AO1)
2439 LFD a6, 3 * SIZE(AO2)
2440 LFD a7, 4 * SIZE(AO1)
2441 LFD a8, 4 * SIZE(AO2)
2443 LFD b5, 5 * SIZE(BO)
2444 LFD b6, 6 * SIZE(BO)
2445 LFD b7, 7 * SIZE(BO)
2446 LFD b8, 8 * SIZE(BO)
2448 FMADD y01, a1, b1, y01
2449 LFD a1, 5 * SIZE(AO1)
2450 FMADD y02, a2, b1, y02
2451 LFD a2, 5 * SIZE(AO2)
2452 FMADD y09, a3, b2, y09
2453 LFD a3, 6 * SIZE(AO1)
2454 FMADD y10, a4, b2, y10
2455 LFD a4, 6 * SIZE(AO2)
2457 FMADD y01, a5, b3, y01
2458 LFD a5, 7 * SIZE(AO1)
2459 FMADD y02, a6, b3, y02
2460 LFD a6, 7 * SIZE(AO2)
2461 FMADD y09, a7, b4, y09
2462 LFD a7, 8 * SIZE(AO1)
2463 FMADD y10, a8, b4, y10
2464 LFD a8, 8 * SIZE(AO2)
2466 FMADD y01, a1, b5, y01
2467 FMADD y02, a2, b5, y02
2468 FMADD y09, a3, b6, y09
2469 FMADD y10, a4, b6, y10
2471 FMADD y01, a5, b7, y01
2472 addi AO1, AO1, 8 * SIZE
2473 FMADD y02, a6, b7, y02
2474 addi AO2, AO2, 8 * SIZE
2475 FMADD y09, a7, b8, y09
2476 addi BO, BO, 8 * SIZE
2477 FMADD y10, a8, b8, y10
2485 LFD a1, 1 * SIZE(AO1)
2486 LFD a2, 1 * SIZE(AO2)
2487 LFD a3, 2 * SIZE(AO1)
2488 LFD a4, 2 * SIZE(AO2)
2490 LFD a5, 3 * SIZE(AO1)
2491 LFD a6, 3 * SIZE(AO2)
2492 LFD a7, 4 * SIZE(AO1)
2493 LFD a8, 4 * SIZE(AO2)
2495 LFD b1, 1 * SIZE(BO)
2496 LFD b2, 2 * SIZE(BO)
2497 LFD b3, 3 * SIZE(BO)
2498 LFD b4, 4 * SIZE(BO)
2500 FMADD y01, a1, b1, y01
2501 FMADD y02, a2, b1, y02
2502 FMADD y09, a3, b2, y09
2503 FMADD y10, a4, b2, y10
2505 FMADD y01, a5, b3, y01
2506 addi AO1, AO1, 4 * SIZE
2507 FMADD y02, a6, b3, y02
2508 addi AO2, AO2, 4 * SIZE
2510 FMADD y09, a7, b4, y09
2511 addi BO, BO, 4 * SIZE
2512 FMADD y10, a8, b4, y10
2519 LFD a1, 1 * SIZE(AO1)
2520 LFD a2, 1 * SIZE(AO2)
2521 LFD b1, 1 * SIZE(BO)
2522 LFD b2, 2 * SIZE(BO)
2524 LFD a3, 2 * SIZE(AO1)
2525 LFD a4, 2 * SIZE(AO2)
2527 FMADD y01, a1, b1, y01
2528 FMADD y02, a2, b1, y02
2529 FMADD y09, a3, b2, y09
2530 FMADD y10, a4, b2, y10
2532 addi AO1, AO1, 2 * SIZE
2533 addi AO2, AO2, 2 * SIZE
2534 addi BO, BO, 2 * SIZE
2541 LFD a1, 1 * SIZE(AO1)
2542 LFD b1, 1 * SIZE(BO)
2543 LFD a2, 1 * SIZE(AO2)
2545 FMADD y01, a1, b1, y01
2546 FMADD y02, a2, b1, y02
2552 cmpi cr0, 0, INCY, SIZE
2555 LFD a1, 1 * SIZE(CO)
2556 LFD a2, 2 * SIZE(CO)
2566 FMADD a1, alpha, y01, a1
2567 FMADD a2, alpha, y02, a2
2569 STFD a1, 1 * SIZE(CO)
2570 STFD a2, 2 * SIZE(CO)
2572 addi CO, CO, 2 * SIZE
2588 FMADD a1, alpha, f0, a1
2589 FMADD a2, alpha, f1, a2
2618 LFD a1, 1 * SIZE(AO1)
2619 LFD a2, 2 * SIZE(AO1)
2620 LFD a3, 3 * SIZE(AO1)
2621 LFD a4, 4 * SIZE(AO1)
2622 LFD a5, 5 * SIZE(AO1)
2623 LFD a6, 6 * SIZE(AO1)
2624 LFD a7, 7 * SIZE(AO1)
2625 LFD a8, 8 * SIZE(AO1)
2627 LFD b1, 1 * SIZE(BO)
2628 LFD b2, 2 * SIZE(BO)
2629 LFD b3, 3 * SIZE(BO)
2630 LFD b4, 4 * SIZE(BO)
2631 LFD b5, 5 * SIZE(BO)
2632 LFD b6, 6 * SIZE(BO)
2633 LFD b7, 7 * SIZE(BO)
2634 LFD b8, 8 * SIZE(BO)
2639 FMADD y01, a1, b1, y01
2641 LFD a1, 9 * SIZE(AO1)
2642 LFD b1, 9 * SIZE(BO)
2644 FMADD y02, a2, b2, y02
2646 LFD a2, 10 * SIZE(AO1)
2647 LFD b2, 10 * SIZE(BO)
2649 FMADD y03, a3, b3, y03
2651 LFD a3, 11 * SIZE(AO1)
2652 LFD b3, 11 * SIZE(BO)
2654 FMADD y04, a4, b4, y04
2656 LFD a4, 12 * SIZE(AO1)
2657 LFD b4, 12 * SIZE(BO)
2659 FMADD y01, a5, b5, y01
2661 LFD a5, 13 * SIZE(AO1)
2662 LFD b5, 13 * SIZE(BO)
2664 FMADD y02, a6, b6, y02
2666 LFD a6, 14 * SIZE(AO1)
2667 LFD b6, 14 * SIZE(BO)
2669 FMADD y03, a7, b7, y03
2671 LFD a7, 15 * SIZE(AO1)
2672 LFD b7, 15 * SIZE(BO)
2674 FMADD y04, a8, b8, y04
2676 LFD a8, 16 * SIZE(AO1)
2677 LFD b8, 16 * SIZE(BO)
2679 FMADD y01, a1, b1, y01
2681 LFD a1, 17 * SIZE(AO1)
2682 LFD b1, 17 * SIZE(BO)
2684 FMADD y02, a2, b2, y02
2686 LFD a2, 18 * SIZE(AO1)
2687 LFD b2, 18 * SIZE(BO)
2689 FMADD y03, a3, b3, y03
2691 LFD a3, 19 * SIZE(AO1)
2692 LFD b3, 19 * SIZE(BO)
2694 FMADD y04, a4, b4, y04
2696 LFD a4, 20 * SIZE(AO1)
2697 LFD b4, 20 * SIZE(BO)
2699 FMADD y01, a5, b5, y01
2701 LFD a5, 21 * SIZE(AO1)
2702 LFD b5, 21 * SIZE(BO)
2704 FMADD y02, a6, b6, y02
2706 LFD a6, 22 * SIZE(AO1)
2707 LFD b6, 22 * SIZE(BO)
2709 FMADD y03, a7, b7, y03
2711 LFD a7, 23 * SIZE(AO1)
2712 LFD b7, 23 * SIZE(BO)
2714 FMADD y04, a8, b8, y04
2716 LFD a8, 24 * SIZE(AO1)
2717 LFD b8, 24 * SIZE(BO)
2719 addi AO1, AO1, 16 * SIZE
2720 addi BO, BO, 16 * SIZE
2726 FMADD y01, a1, b1, y01
2728 LFD a1, 9 * SIZE(AO1)
2729 LFD b1, 9 * SIZE(BO)
2731 FMADD y02, a2, b2, y02
2733 LFD a2, 10 * SIZE(AO1)
2734 LFD b2, 10 * SIZE(BO)
2736 FMADD y03, a3, b3, y03
2738 LFD a3, 11 * SIZE(AO1)
2739 LFD b3, 11 * SIZE(BO)
2741 FMADD y04, a4, b4, y04
2743 LFD a4, 12 * SIZE(AO1)
2744 LFD b4, 12 * SIZE(BO)
2746 FMADD y01, a5, b5, y01
2748 LFD a5, 13 * SIZE(AO1)
2749 LFD b5, 13 * SIZE(BO)
2751 FMADD y02, a6, b6, y02
2753 LFD a6, 14 * SIZE(AO1)
2754 LFD b6, 14 * SIZE(BO)
2756 FMADD y03, a7, b7, y03
2758 LFD a7, 15 * SIZE(AO1)
2759 LFD b7, 15 * SIZE(BO)
2761 FMADD y04, a8, b8, y04
2763 LFD a8, 16 * SIZE(AO1)
2764 LFD b8, 16 * SIZE(BO)
2766 FMADD y01, a1, b1, y01
2767 FMADD y02, a2, b2, y02
2768 FMADD y03, a3, b3, y03
2769 FMADD y04, a4, b4, y04
2771 FMADD y01, a5, b5, y01
2772 addi AO1, AO1, 16 * SIZE
2773 FMADD y02, a6, b6, y02
2774 addi BO, BO, 16 * SIZE
2776 FMADD y03, a7, b7, y03
2778 FMADD y04, a8, b8, y04
2788 LFD a1, 1 * SIZE(AO1)
2789 LFD a2, 2 * SIZE(AO1)
2790 LFD a3, 3 * SIZE(AO1)
2791 LFD a4, 4 * SIZE(AO1)
2793 LFD b1, 1 * SIZE(BO)
2794 LFD b2, 2 * SIZE(BO)
2795 LFD b3, 3 * SIZE(BO)
2796 LFD b4, 4 * SIZE(BO)
2798 LFD a5, 5 * SIZE(AO1)
2799 LFD a6, 6 * SIZE(AO1)
2800 LFD a7, 7 * SIZE(AO1)
2801 LFD a8, 8 * SIZE(AO1)
2803 LFD b5, 5 * SIZE(BO)
2804 LFD b6, 6 * SIZE(BO)
2805 LFD b7, 7 * SIZE(BO)
2806 LFD b8, 8 * SIZE(BO)
2808 FMADD y01, a1, b1, y01
2809 FMADD y02, a2, b2, y02
2810 FMADD y03, a3, b3, y03
2811 FMADD y04, a4, b4, y04
2813 FMADD y01, a5, b5, y01
2814 addi AO1, AO1, 8 * SIZE
2815 FMADD y02, a6, b6, y02
2816 addi BO, BO, 8 * SIZE
2817 FMADD y03, a7, b7, y03
2819 FMADD y04, a8, b8, y04
2827 LFD a1, 1 * SIZE(AO1)
2828 LFD b1, 1 * SIZE(BO)
2829 LFD a2, 2 * SIZE(AO1)
2830 LFD b2, 2 * SIZE(BO)
2832 LFD a3, 3 * SIZE(AO1)
2833 LFD b3, 3 * SIZE(BO)
2834 LFD a4, 4 * SIZE(AO1)
2835 LFD b4, 4 * SIZE(BO)
2837 FMADD y01, a1, b1, y01
2838 addi AO1, AO1, 4 * SIZE
2839 FMADD y02, a2, b2, y02
2840 addi AO2, AO2, 4 * SIZE
2842 FMADD y03, a3, b3, y03
2843 addi BO, BO, 4 * SIZE
2844 FMADD y04, a4, b4, y04
2852 LFD a1, 1 * SIZE(AO1)
2853 LFD b1, 1 * SIZE(BO)
2854 LFD a2, 2 * SIZE(AO1)
2855 LFD b2, 2 * SIZE(BO)
2857 FMADD y01, a1, b1, y01
2858 addi AO1, AO1, 2 * SIZE
2859 FMADD y02, a2, b2, y02
2860 addi BO, BO, 2 * SIZE
2867 LFD a1, 1 * SIZE(AO1)
2868 LFD b1, 1 * SIZE(BO)
2869 FMADD y01, a1, b1, y01
2875 cmpi cr0, 0, INCY, SIZE
2878 LFD a1, 1 * SIZE(CO)
2884 FMADD a1, alpha, y01, a1
2885 STFD a1, 1 * SIZE(CO)
2894 FMADD a1, alpha, f0, a1
2963 addi SP, SP, STACKSIZE