1 /*********************************************************************/
2 /* Copyright 2009, 2010 The University of Texas at Austin. */
3 /* All rights reserved. */
5 /* Redistribution and use in source and binary forms, with or */
6 /* without modification, are permitted provided that the following */
7 /* conditions are met: */
9 /* 1. Redistributions of source code must retain the above */
10 /* copyright notice, this list of conditions and the following */
13 /* 2. Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
18 /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19 /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22 /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23 /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24 /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25 /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26 /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27 /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28 /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30 /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31 /* POSSIBILITY OF SUCH DAMAGE. */
33 /* The views and conclusions contained in the software and */
34 /* documentation are those of the authors and should not be */
35 /* interpreted as representing official policies, either expressed */
36 /* or implied, of The University of Texas at Austin. */
37 /*********************************************************************/
64 #if defined(_AIX) || defined(__APPLE__)
65 #if !defined(__64BIT__) && defined(DOUBLE)
143 #define PREFETCHSIZE_A 24
144 #define PREFETCHSIZE_C 16
147 #if defined(PPC440) || defined(PPC440FP2)
148 #define PREFETCHSIZE_A 24
149 #define PREFETCHSIZE_C 16
153 #define PREFETCHSIZE_A 16
154 #define PREFETCHSIZE_C 16
158 #define PREFETCHSIZE_A 16
159 #define PREFETCHSIZE_C 16
163 #define PREFETCHSIZE_A 16
164 #define PREFETCHSIZE_C 16
168 #define PREFETCHSIZE_A 40
169 #define PREFETCHSIZE_C 24
173 #define PREFETCHSIZE_A 96
174 #define PREFETCHSIZE_C 40
180 #define STACKSIZE 224
181 #define ALPHA 200(SP)
182 #define FZERO 208(SP)
184 #define STACKSIZE 280
185 #define ALPHA 256(SP)
186 #define FZERO 264(SP)
192 addi SP, SP, -STACKSIZE
251 lwz INCY, FRAMESLOT(0) + STACKSIZE(SP)
252 lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP)
254 ld Y, FRAMESLOT(0) + STACKSIZE(SP)
255 ld INCY, FRAMESLOT(1) + STACKSIZE(SP)
256 ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
260 #if defined(_AIX) || defined(__APPLE__)
263 lwz INCX, FRAMESLOT(0) + STACKSIZE(SP)
264 lwz Y, FRAMESLOT(1) + STACKSIZE(SP)
265 lwz INCY, FRAMESLOT(2) + STACKSIZE(SP)
266 lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP)
268 lwz Y, FRAMESLOT(0) + STACKSIZE(SP)
269 lwz INCY, FRAMESLOT(1) + STACKSIZE(SP)
270 lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
273 ld Y, FRAMESLOT(0) + STACKSIZE(SP)
274 ld INCY, FRAMESLOT(1) + STACKSIZE(SP)
275 ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP)
282 slwi LDA8, LDA, BASE_SHIFT + 3
283 slwi LDA, LDA, BASE_SHIFT
284 slwi INCX, INCX, BASE_SHIFT
285 slwi INCY, INCY, BASE_SHIFT
287 li PREA, PREFETCHSIZE_A * SIZE
288 li PREC, PREFETCHSIZE_C * SIZE
299 cmpi cr0, 0, INCY, SIZE
311 STFD f0, 0 * SIZE(Y1)
312 STFD f0, 1 * SIZE(Y1)
313 STFD f0, 2 * SIZE(Y1)
314 STFD f0, 3 * SIZE(Y1)
315 STFD f0, 4 * SIZE(Y1)
316 STFD f0, 5 * SIZE(Y1)
317 STFD f0, 6 * SIZE(Y1)
318 STFD f0, 7 * SIZE(Y1)
319 addi Y1, Y1, 8 * SIZE
329 LFD alpha1, 0 * SIZE(X)
331 LFD alpha2, 0 * SIZE(X)
333 LFD alpha3, 0 * SIZE(X)
335 LFD alpha4, 0 * SIZE(X)
337 LFD alpha5, 0 * SIZE(X)
339 LFD alpha6, 0 * SIZE(X)
341 LFD alpha7, 0 * SIZE(X)
343 LFD alpha8, 0 * SIZE(X)
346 FMUL alpha1, alpha, alpha1
347 FMUL alpha2, alpha, alpha2
348 FMUL alpha3, alpha, alpha3
349 FMUL alpha4, alpha, alpha4
350 FMUL alpha5, alpha, alpha5
351 FMUL alpha6, alpha, alpha6
352 FMUL alpha7, alpha, alpha7
353 FMUL alpha8, alpha, alpha8
371 LFD y01, 0 * SIZE(Y1)
372 LFD y02, 1 * SIZE(Y1)
373 LFD y03, 2 * SIZE(Y1)
374 LFD y04, 3 * SIZE(Y1)
375 LFD y05, 4 * SIZE(Y1)
376 LFD y06, 5 * SIZE(Y1)
377 LFD y07, 6 * SIZE(Y1)
378 LFD y08, 7 * SIZE(Y1)
380 LFD a1, 0 * SIZE(AO1)
381 LFD a2, 1 * SIZE(AO1)
382 LFD a3, 2 * SIZE(AO1)
383 LFD a4, 3 * SIZE(AO1)
384 LFD a5, 4 * SIZE(AO1)
385 LFD a6, 5 * SIZE(AO1)
386 LFD a7, 6 * SIZE(AO1)
387 LFD a8, 7 * SIZE(AO1)
389 LFD y09, 8 * SIZE(Y1)
390 LFD y10, 9 * SIZE(Y1)
391 LFD y11, 10 * SIZE(Y1)
392 LFD y12, 11 * SIZE(Y1)
393 LFD y13, 12 * SIZE(Y1)
394 LFD y14, 13 * SIZE(Y1)
395 LFD y15, 14 * SIZE(Y1)
396 LFD y16, 15 * SIZE(Y1)
398 FMADD y01, alpha1, a1, y01
399 FMADD y02, alpha1, a2, y02
400 FMADD y03, alpha1, a3, y03
401 FMADD y04, alpha1, a4, y04
403 LFD a1, 8 * SIZE(AO1)
404 LFD a2, 9 * SIZE(AO1)
405 LFD a3, 10 * SIZE(AO1)
406 LFD a4, 11 * SIZE(AO1)
408 FMADD y05, alpha1, a5, y05
409 FMADD y06, alpha1, a6, y06
410 FMADD y07, alpha1, a7, y07
411 FMADD y08, alpha1, a8, y08
413 LFD a5, 12 * SIZE(AO1)
414 LFD a6, 13 * SIZE(AO1)
415 LFD a7, 14 * SIZE(AO1)
416 LFD a8, 15 * SIZE(AO1)
418 addi AO1, AO1, 16 * SIZE
423 FMADD y09, alpha1, a1, y09
424 FMADD y10, alpha1, a2, y10
425 FMADD y11, alpha1, a3, y11
426 FMADD y12, alpha1, a4, y12
428 LFD a1, 0 * SIZE(AO2)
429 LFD a2, 1 * SIZE(AO2)
430 LFD a3, 2 * SIZE(AO2)
431 LFD a4, 3 * SIZE(AO2)
433 FMADD y13, alpha1, a5, y13
434 FMADD y14, alpha1, a6, y14
435 FMADD y15, alpha1, a7, y15
436 FMADD y16, alpha1, a8, y16
438 LFD a5, 4 * SIZE(AO2)
439 LFD a6, 5 * SIZE(AO2)
440 LFD a7, 6 * SIZE(AO2)
441 LFD a8, 7 * SIZE(AO2)
443 FMADD y01, alpha2, a1, y01
444 FMADD y02, alpha2, a2, y02
445 FMADD y03, alpha2, a3, y03
446 FMADD y04, alpha2, a4, y04
448 LFD a1, 8 * SIZE(AO2)
449 LFD a2, 9 * SIZE(AO2)
450 LFD a3, 10 * SIZE(AO2)
451 LFD a4, 11 * SIZE(AO2)
453 FMADD y05, alpha2, a5, y05
454 FMADD y06, alpha2, a6, y06
455 FMADD y07, alpha2, a7, y07
456 FMADD y08, alpha2, a8, y08
458 LFD a5, 12 * SIZE(AO2)
459 LFD a6, 13 * SIZE(AO2)
460 LFD a7, 14 * SIZE(AO2)
461 LFD a8, 15 * SIZE(AO2)
463 addi AO2, AO2, 16 * SIZE
468 FMADD y09, alpha2, a1, y09
469 FMADD y10, alpha2, a2, y10
470 FMADD y11, alpha2, a3, y11
471 FMADD y12, alpha2, a4, y12
473 LFD a1, 0 * SIZE(AO3)
474 LFD a2, 1 * SIZE(AO3)
475 LFD a3, 2 * SIZE(AO3)
476 LFD a4, 3 * SIZE(AO3)
478 FMADD y13, alpha2, a5, y13
479 FMADD y14, alpha2, a6, y14
480 FMADD y15, alpha2, a7, y15
481 FMADD y16, alpha2, a8, y16
483 LFD a5, 4 * SIZE(AO3)
484 LFD a6, 5 * SIZE(AO3)
485 LFD a7, 6 * SIZE(AO3)
486 LFD a8, 7 * SIZE(AO3)
488 FMADD y01, alpha3, a1, y01
489 FMADD y02, alpha3, a2, y02
490 FMADD y03, alpha3, a3, y03
491 FMADD y04, alpha3, a4, y04
493 LFD a1, 8 * SIZE(AO3)
494 LFD a2, 9 * SIZE(AO3)
495 LFD a3, 10 * SIZE(AO3)
496 LFD a4, 11 * SIZE(AO3)
498 FMADD y05, alpha3, a5, y05
499 FMADD y06, alpha3, a6, y06
500 FMADD y07, alpha3, a7, y07
501 FMADD y08, alpha3, a8, y08
503 LFD a5, 12 * SIZE(AO3)
504 LFD a6, 13 * SIZE(AO3)
505 LFD a7, 14 * SIZE(AO3)
506 LFD a8, 15 * SIZE(AO3)
508 addi AO3, AO3, 16 * SIZE
513 FMADD y09, alpha3, a1, y09
514 FMADD y10, alpha3, a2, y10
515 FMADD y11, alpha3, a3, y11
516 FMADD y12, alpha3, a4, y12
518 LFD a1, 0 * SIZE(AO4)
519 LFD a2, 1 * SIZE(AO4)
520 LFD a3, 2 * SIZE(AO4)
521 LFD a4, 3 * SIZE(AO4)
523 FMADD y13, alpha3, a5, y13
524 FMADD y14, alpha3, a6, y14
525 FMADD y15, alpha3, a7, y15
526 FMADD y16, alpha3, a8, y16
528 LFD a5, 4 * SIZE(AO4)
529 LFD a6, 5 * SIZE(AO4)
530 LFD a7, 6 * SIZE(AO4)
531 LFD a8, 7 * SIZE(AO4)
533 FMADD y01, alpha4, a1, y01
534 FMADD y02, alpha4, a2, y02
535 FMADD y03, alpha4, a3, y03
536 FMADD y04, alpha4, a4, y04
538 LFD a1, 8 * SIZE(AO4)
539 LFD a2, 9 * SIZE(AO4)
540 LFD a3, 10 * SIZE(AO4)
541 LFD a4, 11 * SIZE(AO4)
543 FMADD y05, alpha4, a5, y05
544 FMADD y06, alpha4, a6, y06
545 FMADD y07, alpha4, a7, y07
546 FMADD y08, alpha4, a8, y08
548 LFD a5, 12 * SIZE(AO4)
549 LFD a6, 13 * SIZE(AO4)
550 LFD a7, 14 * SIZE(AO4)
551 LFD a8, 15 * SIZE(AO4)
553 addi AO4, AO4, 16 * SIZE
558 FMADD y09, alpha4, a1, y09
559 FMADD y10, alpha4, a2, y10
560 FMADD y11, alpha4, a3, y11
561 FMADD y12, alpha4, a4, y12
563 LFD a1, 0 * SIZE(AO5)
564 LFD a2, 1 * SIZE(AO5)
565 LFD a3, 2 * SIZE(AO5)
566 LFD a4, 3 * SIZE(AO5)
568 FMADD y13, alpha4, a5, y13
569 FMADD y14, alpha4, a6, y14
570 FMADD y15, alpha4, a7, y15
571 FMADD y16, alpha4, a8, y16
573 LFD a5, 4 * SIZE(AO5)
574 LFD a6, 5 * SIZE(AO5)
575 LFD a7, 6 * SIZE(AO5)
576 LFD a8, 7 * SIZE(AO5)
578 FMADD y01, alpha5, a1, y01
579 FMADD y02, alpha5, a2, y02
580 FMADD y03, alpha5, a3, y03
581 FMADD y04, alpha5, a4, y04
583 LFD a1, 8 * SIZE(AO5)
584 LFD a2, 9 * SIZE(AO5)
585 LFD a3, 10 * SIZE(AO5)
586 LFD a4, 11 * SIZE(AO5)
588 FMADD y05, alpha5, a5, y05
589 FMADD y06, alpha5, a6, y06
590 FMADD y07, alpha5, a7, y07
591 FMADD y08, alpha5, a8, y08
593 LFD a5, 12 * SIZE(AO5)
594 LFD a6, 13 * SIZE(AO5)
595 LFD a7, 14 * SIZE(AO5)
596 LFD a8, 15 * SIZE(AO5)
598 addi AO5, AO5, 16 * SIZE
603 FMADD y09, alpha5, a1, y09
604 FMADD y10, alpha5, a2, y10
605 FMADD y11, alpha5, a3, y11
606 FMADD y12, alpha5, a4, y12
608 LFD a1, 0 * SIZE(AO6)
609 LFD a2, 1 * SIZE(AO6)
610 LFD a3, 2 * SIZE(AO6)
611 LFD a4, 3 * SIZE(AO6)
613 FMADD y13, alpha5, a5, y13
614 FMADD y14, alpha5, a6, y14
615 FMADD y15, alpha5, a7, y15
616 FMADD y16, alpha5, a8, y16
618 LFD a5, 4 * SIZE(AO6)
619 LFD a6, 5 * SIZE(AO6)
620 LFD a7, 6 * SIZE(AO6)
621 LFD a8, 7 * SIZE(AO6)
623 FMADD y01, alpha6, a1, y01
624 FMADD y02, alpha6, a2, y02
625 FMADD y03, alpha6, a3, y03
626 FMADD y04, alpha6, a4, y04
628 LFD a1, 8 * SIZE(AO6)
629 LFD a2, 9 * SIZE(AO6)
630 LFD a3, 10 * SIZE(AO6)
631 LFD a4, 11 * SIZE(AO6)
633 FMADD y05, alpha6, a5, y05
634 FMADD y06, alpha6, a6, y06
635 FMADD y07, alpha6, a7, y07
636 FMADD y08, alpha6, a8, y08
638 LFD a5, 12 * SIZE(AO6)
639 LFD a6, 13 * SIZE(AO6)
640 LFD a7, 14 * SIZE(AO6)
641 LFD a8, 15 * SIZE(AO6)
643 addi AO6, AO6, 16 * SIZE
648 FMADD y09, alpha6, a1, y09
649 FMADD y10, alpha6, a2, y10
650 FMADD y11, alpha6, a3, y11
651 FMADD y12, alpha6, a4, y12
653 LFD a1, 0 * SIZE(AO7)
654 LFD a2, 1 * SIZE(AO7)
655 LFD a3, 2 * SIZE(AO7)
656 LFD a4, 3 * SIZE(AO7)
658 FMADD y13, alpha6, a5, y13
659 FMADD y14, alpha6, a6, y14
660 FMADD y15, alpha6, a7, y15
661 FMADD y16, alpha6, a8, y16
663 LFD a5, 4 * SIZE(AO7)
664 LFD a6, 5 * SIZE(AO7)
665 LFD a7, 6 * SIZE(AO7)
666 LFD a8, 7 * SIZE(AO7)
668 FMADD y01, alpha7, a1, y01
669 FMADD y02, alpha7, a2, y02
670 FMADD y03, alpha7, a3, y03
671 FMADD y04, alpha7, a4, y04
673 LFD a1, 8 * SIZE(AO7)
674 LFD a2, 9 * SIZE(AO7)
675 LFD a3, 10 * SIZE(AO7)
676 LFD a4, 11 * SIZE(AO7)
678 FMADD y05, alpha7, a5, y05
679 FMADD y06, alpha7, a6, y06
680 FMADD y07, alpha7, a7, y07
681 FMADD y08, alpha7, a8, y08
683 LFD a5, 12 * SIZE(AO7)
684 LFD a6, 13 * SIZE(AO7)
685 LFD a7, 14 * SIZE(AO7)
686 LFD a8, 15 * SIZE(AO7)
688 addi AO7, AO7, 16 * SIZE
693 FMADD y09, alpha7, a1, y09
694 FMADD y10, alpha7, a2, y10
695 FMADD y11, alpha7, a3, y11
696 FMADD y12, alpha7, a4, y12
698 LFD a1, 0 * SIZE(AO8)
699 LFD a2, 1 * SIZE(AO8)
700 LFD a3, 2 * SIZE(AO8)
701 LFD a4, 3 * SIZE(AO8)
703 FMADD y13, alpha7, a5, y13
704 FMADD y14, alpha7, a6, y14
705 FMADD y15, alpha7, a7, y15
706 FMADD y16, alpha7, a8, y16
708 LFD a5, 4 * SIZE(AO8)
709 LFD a6, 5 * SIZE(AO8)
710 LFD a7, 6 * SIZE(AO8)
711 LFD a8, 7 * SIZE(AO8)
713 FMADD y01, alpha8, a1, y01
714 FMADD y02, alpha8, a2, y02
715 FMADD y03, alpha8, a3, y03
716 FMADD y04, alpha8, a4, y04
718 LFD a1, 8 * SIZE(AO8)
719 LFD a2, 9 * SIZE(AO8)
720 LFD a3, 10 * SIZE(AO8)
721 LFD a4, 11 * SIZE(AO8)
723 FMADD y05, alpha8, a5, y05
724 FMADD y06, alpha8, a6, y06
725 FMADD y07, alpha8, a7, y07
726 FMADD y08, alpha8, a8, y08
728 LFD a5, 12 * SIZE(AO8)
729 LFD a6, 13 * SIZE(AO8)
730 LFD a7, 14 * SIZE(AO8)
731 LFD a8, 15 * SIZE(AO8)
733 addi AO8, AO8, 16 * SIZE
738 FMADD y09, alpha8, a1, y09
739 FMADD y10, alpha8, a2, y10
740 FMADD y11, alpha8, a3, y11
741 FMADD y12, alpha8, a4, y12
743 LFD a1, 0 * SIZE(AO1)
744 LFD a2, 1 * SIZE(AO1)
745 LFD a3, 2 * SIZE(AO1)
746 LFD a4, 3 * SIZE(AO1)
748 FMADD y13, alpha8, a5, y13
749 FMADD y14, alpha8, a6, y14
750 FMADD y15, alpha8, a7, y15
751 FMADD y16, alpha8, a8, y16
753 LFD a5, 4 * SIZE(AO1)
754 LFD a6, 5 * SIZE(AO1)
755 LFD a7, 6 * SIZE(AO1)
756 LFD a8, 7 * SIZE(AO1)
758 STFD y01, 0 * SIZE(Y1)
759 STFD y02, 1 * SIZE(Y1)
760 STFD y03, 2 * SIZE(Y1)
761 STFD y04, 3 * SIZE(Y1)
763 LFD y01, 16 * SIZE(Y1)
764 LFD y02, 17 * SIZE(Y1)
765 LFD y03, 18 * SIZE(Y1)
766 LFD y04, 19 * SIZE(Y1)
773 FMADD y01, alpha1, a1, y01
774 FMADD y02, alpha1, a2, y02
775 FMADD y03, alpha1, a3, y03
776 FMADD y04, alpha1, a4, y04
778 LFD a1, 8 * SIZE(AO1)
779 LFD a2, 9 * SIZE(AO1)
780 LFD a3, 10 * SIZE(AO1)
781 LFD a4, 11 * SIZE(AO1)
783 STFD y05, 4 * SIZE(Y1)
784 STFD y06, 5 * SIZE(Y1)
785 STFD y07, 6 * SIZE(Y1)
786 STFD y08, 7 * SIZE(Y1)
788 LFD y05, 20 * SIZE(Y1)
789 LFD y06, 21 * SIZE(Y1)
790 LFD y07, 22 * SIZE(Y1)
791 LFD y08, 23 * SIZE(Y1)
793 FMADD y05, alpha1, a5, y05
794 FMADD y06, alpha1, a6, y06
795 FMADD y07, alpha1, a7, y07
796 FMADD y08, alpha1, a8, y08
798 LFD a5, 12 * SIZE(AO1)
799 LFD a6, 13 * SIZE(AO1)
800 LFD a7, 14 * SIZE(AO1)
801 LFD a8, 15 * SIZE(AO1)
803 STFD y09, 8 * SIZE(Y1)
804 STFD y10, 9 * SIZE(Y1)
805 STFD y11, 10 * SIZE(Y1)
806 STFD y12, 11 * SIZE(Y1)
808 LFD y09, 24 * SIZE(Y1)
809 LFD y10, 25 * SIZE(Y1)
810 LFD y11, 26 * SIZE(Y1)
811 LFD y12, 27 * SIZE(Y1)
813 FMADD y09, alpha1, a1, y09
814 FMADD y10, alpha1, a2, y10
815 FMADD y11, alpha1, a3, y11
816 FMADD y12, alpha1, a4, y12
818 LFD a1, 0 * SIZE(AO2)
819 LFD a2, 1 * SIZE(AO2)
820 LFD a3, 2 * SIZE(AO2)
821 LFD a4, 3 * SIZE(AO2)
823 STFD y13, 12 * SIZE(Y1)
824 STFD y14, 13 * SIZE(Y1)
825 STFD y15, 14 * SIZE(Y1)
826 STFD y16, 15 * SIZE(Y1)
828 LFD y13, 28 * SIZE(Y1)
829 LFD y14, 29 * SIZE(Y1)
830 LFD y15, 30 * SIZE(Y1)
831 LFD y16, 31 * SIZE(Y1)
833 FMADD y13, alpha1, a5, y13
834 FMADD y14, alpha1, a6, y14
835 FMADD y15, alpha1, a7, y15
836 FMADD y16, alpha1, a8, y16
838 LFD a5, 4 * SIZE(AO2)
839 LFD a6, 5 * SIZE(AO2)
840 LFD a7, 6 * SIZE(AO2)
841 LFD a8, 7 * SIZE(AO2)
843 FMADD y01, alpha2, a1, y01
844 FMADD y02, alpha2, a2, y02
845 FMADD y03, alpha2, a3, y03
846 FMADD y04, alpha2, a4, y04
848 LFD a1, 8 * SIZE(AO2)
849 LFD a2, 9 * SIZE(AO2)
850 LFD a3, 10 * SIZE(AO2)
851 LFD a4, 11 * SIZE(AO2)
853 FMADD y05, alpha2, a5, y05
854 FMADD y06, alpha2, a6, y06
855 FMADD y07, alpha2, a7, y07
856 FMADD y08, alpha2, a8, y08
858 LFD a5, 12 * SIZE(AO2)
859 LFD a6, 13 * SIZE(AO2)
860 LFD a7, 14 * SIZE(AO2)
861 LFD a8, 15 * SIZE(AO2)
863 FMADD y09, alpha2, a1, y09
864 FMADD y10, alpha2, a2, y10
865 FMADD y11, alpha2, a3, y11
866 FMADD y12, alpha2, a4, y12
868 LFD a1, 0 * SIZE(AO3)
869 LFD a2, 1 * SIZE(AO3)
870 LFD a3, 2 * SIZE(AO3)
871 LFD a4, 3 * SIZE(AO3)
873 FMADD y13, alpha2, a5, y13
874 FMADD y14, alpha2, a6, y14
875 FMADD y15, alpha2, a7, y15
876 FMADD y16, alpha2, a8, y16
878 LFD a5, 4 * SIZE(AO3)
879 LFD a6, 5 * SIZE(AO3)
880 LFD a7, 6 * SIZE(AO3)
881 LFD a8, 7 * SIZE(AO3)
883 FMADD y01, alpha3, a1, y01
884 FMADD y02, alpha3, a2, y02
885 FMADD y03, alpha3, a3, y03
886 FMADD y04, alpha3, a4, y04
888 LFD a1, 8 * SIZE(AO3)
889 LFD a2, 9 * SIZE(AO3)
890 LFD a3, 10 * SIZE(AO3)
891 LFD a4, 11 * SIZE(AO3)
893 FMADD y05, alpha3, a5, y05
894 FMADD y06, alpha3, a6, y06
895 FMADD y07, alpha3, a7, y07
896 FMADD y08, alpha3, a8, y08
898 LFD a5, 12 * SIZE(AO3)
899 LFD a6, 13 * SIZE(AO3)
900 LFD a7, 14 * SIZE(AO3)
901 LFD a8, 15 * SIZE(AO3)
903 FMADD y09, alpha3, a1, y09
904 FMADD y10, alpha3, a2, y10
905 FMADD y11, alpha3, a3, y11
906 FMADD y12, alpha3, a4, y12
908 LFD a1, 0 * SIZE(AO4)
909 LFD a2, 1 * SIZE(AO4)
910 LFD a3, 2 * SIZE(AO4)
911 LFD a4, 3 * SIZE(AO4)
913 FMADD y13, alpha3, a5, y13
914 FMADD y14, alpha3, a6, y14
915 FMADD y15, alpha3, a7, y15
916 FMADD y16, alpha3, a8, y16
918 LFD a5, 4 * SIZE(AO4)
919 LFD a6, 5 * SIZE(AO4)
920 LFD a7, 6 * SIZE(AO4)
921 LFD a8, 7 * SIZE(AO4)
923 FMADD y01, alpha4, a1, y01
924 FMADD y02, alpha4, a2, y02
925 FMADD y03, alpha4, a3, y03
926 FMADD y04, alpha4, a4, y04
928 LFD a1, 8 * SIZE(AO4)
929 LFD a2, 9 * SIZE(AO4)
930 LFD a3, 10 * SIZE(AO4)
931 LFD a4, 11 * SIZE(AO4)
933 FMADD y05, alpha4, a5, y05
934 FMADD y06, alpha4, a6, y06
935 FMADD y07, alpha4, a7, y07
936 FMADD y08, alpha4, a8, y08
938 LFD a5, 12 * SIZE(AO4)
939 LFD a6, 13 * SIZE(AO4)
940 LFD a7, 14 * SIZE(AO4)
941 LFD a8, 15 * SIZE(AO4)
943 addi AO1, AO1, 16 * SIZE
944 addi AO2, AO2, 16 * SIZE
945 addi AO3, AO3, 16 * SIZE
946 addi AO4, AO4, 16 * SIZE
953 FMADD y09, alpha4, a1, y09
954 FMADD y10, alpha4, a2, y10
955 FMADD y11, alpha4, a3, y11
956 FMADD y12, alpha4, a4, y12
958 LFD a1, 0 * SIZE(AO5)
959 LFD a2, 1 * SIZE(AO5)
960 LFD a3, 2 * SIZE(AO5)
961 LFD a4, 3 * SIZE(AO5)
963 FMADD y13, alpha4, a5, y13
964 FMADD y14, alpha4, a6, y14
965 FMADD y15, alpha4, a7, y15
966 FMADD y16, alpha4, a8, y16
968 LFD a5, 4 * SIZE(AO5)
969 LFD a6, 5 * SIZE(AO5)
970 LFD a7, 6 * SIZE(AO5)
971 LFD a8, 7 * SIZE(AO5)
973 FMADD y01, alpha5, a1, y01
974 FMADD y02, alpha5, a2, y02
975 FMADD y03, alpha5, a3, y03
976 FMADD y04, alpha5, a4, y04
978 LFD a1, 8 * SIZE(AO5)
979 LFD a2, 9 * SIZE(AO5)
980 LFD a3, 10 * SIZE(AO5)
981 LFD a4, 11 * SIZE(AO5)
983 FMADD y05, alpha5, a5, y05
984 FMADD y06, alpha5, a6, y06
985 FMADD y07, alpha5, a7, y07
986 FMADD y08, alpha5, a8, y08
988 LFD a5, 12 * SIZE(AO5)
989 LFD a6, 13 * SIZE(AO5)
990 LFD a7, 14 * SIZE(AO5)
991 LFD a8, 15 * SIZE(AO5)
993 FMADD y09, alpha5, a1, y09
994 FMADD y10, alpha5, a2, y10
995 FMADD y11, alpha5, a3, y11
996 FMADD y12, alpha5, a4, y12
998 LFD a1, 0 * SIZE(AO6)
999 LFD a2, 1 * SIZE(AO6)
1000 LFD a3, 2 * SIZE(AO6)
1001 LFD a4, 3 * SIZE(AO6)
1003 FMADD y13, alpha5, a5, y13
1004 FMADD y14, alpha5, a6, y14
1005 FMADD y15, alpha5, a7, y15
1006 FMADD y16, alpha5, a8, y16
1008 LFD a5, 4 * SIZE(AO6)
1009 LFD a6, 5 * SIZE(AO6)
1010 LFD a7, 6 * SIZE(AO6)
1011 LFD a8, 7 * SIZE(AO6)
1013 FMADD y01, alpha6, a1, y01
1014 FMADD y02, alpha6, a2, y02
1015 FMADD y03, alpha6, a3, y03
1016 FMADD y04, alpha6, a4, y04
1018 LFD a1, 8 * SIZE(AO6)
1019 LFD a2, 9 * SIZE(AO6)
1020 LFD a3, 10 * SIZE(AO6)
1021 LFD a4, 11 * SIZE(AO6)
1023 FMADD y05, alpha6, a5, y05
1024 FMADD y06, alpha6, a6, y06
1025 FMADD y07, alpha6, a7, y07
1026 FMADD y08, alpha6, a8, y08
1028 LFD a5, 12 * SIZE(AO6)
1029 LFD a6, 13 * SIZE(AO6)
1030 LFD a7, 14 * SIZE(AO6)
1031 LFD a8, 15 * SIZE(AO6)
1033 FMADD y09, alpha6, a1, y09
1034 FMADD y10, alpha6, a2, y10
1035 FMADD y11, alpha6, a3, y11
1036 FMADD y12, alpha6, a4, y12
1038 LFD a1, 0 * SIZE(AO7)
1039 LFD a2, 1 * SIZE(AO7)
1040 LFD a3, 2 * SIZE(AO7)
1041 LFD a4, 3 * SIZE(AO7)
1043 FMADD y13, alpha6, a5, y13
1044 FMADD y14, alpha6, a6, y14
1045 FMADD y15, alpha6, a7, y15
1046 FMADD y16, alpha6, a8, y16
1048 LFD a5, 4 * SIZE(AO7)
1049 LFD a6, 5 * SIZE(AO7)
1050 LFD a7, 6 * SIZE(AO7)
1051 LFD a8, 7 * SIZE(AO7)
1053 FMADD y01, alpha7, a1, y01
1054 FMADD y02, alpha7, a2, y02
1055 FMADD y03, alpha7, a3, y03
1056 FMADD y04, alpha7, a4, y04
1058 LFD a1, 8 * SIZE(AO7)
1059 LFD a2, 9 * SIZE(AO7)
1060 LFD a3, 10 * SIZE(AO7)
1061 LFD a4, 11 * SIZE(AO7)
1063 FMADD y05, alpha7, a5, y05
1064 FMADD y06, alpha7, a6, y06
1065 FMADD y07, alpha7, a7, y07
1066 FMADD y08, alpha7, a8, y08
1068 LFD a5, 12 * SIZE(AO7)
1069 LFD a6, 13 * SIZE(AO7)
1070 LFD a7, 14 * SIZE(AO7)
1071 LFD a8, 15 * SIZE(AO7)
1073 FMADD y09, alpha7, a1, y09
1074 FMADD y10, alpha7, a2, y10
1075 FMADD y11, alpha7, a3, y11
1076 FMADD y12, alpha7, a4, y12
1078 LFD a1, 0 * SIZE(AO8)
1079 LFD a2, 1 * SIZE(AO8)
1080 LFD a3, 2 * SIZE(AO8)
1081 LFD a4, 3 * SIZE(AO8)
1083 FMADD y13, alpha7, a5, y13
1084 FMADD y14, alpha7, a6, y14
1085 FMADD y15, alpha7, a7, y15
1086 FMADD y16, alpha7, a8, y16
1088 LFD a5, 4 * SIZE(AO8)
1089 LFD a6, 5 * SIZE(AO8)
1090 LFD a7, 6 * SIZE(AO8)
1091 LFD a8, 7 * SIZE(AO8)
1093 FMADD y01, alpha8, a1, y01
1094 FMADD y02, alpha8, a2, y02
1095 FMADD y03, alpha8, a3, y03
1096 FMADD y04, alpha8, a4, y04
1098 LFD a1, 8 * SIZE(AO8)
1099 LFD a2, 9 * SIZE(AO8)
1100 LFD a3, 10 * SIZE(AO8)
1101 LFD a4, 11 * SIZE(AO8)
1103 FMADD y05, alpha8, a5, y05
1104 FMADD y06, alpha8, a6, y06
1105 FMADD y07, alpha8, a7, y07
1106 FMADD y08, alpha8, a8, y08
1108 LFD a5, 12 * SIZE(AO8)
1109 LFD a6, 13 * SIZE(AO8)
1110 LFD a7, 14 * SIZE(AO8)
1111 LFD a8, 15 * SIZE(AO8)
1113 addi AO5, AO5, 16 * SIZE
1114 addi AO6, AO6, 16 * SIZE
1115 addi AO7, AO7, 16 * SIZE
1116 addi AO8, AO8, 16 * SIZE
1123 FMADD y09, alpha8, a1, y09
1124 FMADD y10, alpha8, a2, y10
1125 FMADD y11, alpha8, a3, y11
1126 FMADD y12, alpha8, a4, y12
1128 LFD a1, 0 * SIZE(AO1)
1129 LFD a2, 1 * SIZE(AO1)
1130 LFD a3, 2 * SIZE(AO1)
1131 LFD a4, 3 * SIZE(AO1)
1133 FMADD y13, alpha8, a5, y13
1134 FMADD y14, alpha8, a6, y14
1135 FMADD y15, alpha8, a7, y15
1136 FMADD y16, alpha8, a8, y16
1138 LFD a5, 4 * SIZE(AO1)
1139 LFD a6, 5 * SIZE(AO1)
1140 LFD a7, 6 * SIZE(AO1)
1141 LFD a8, 7 * SIZE(AO1)
1143 STFD y01, 16 * SIZE(Y1)
1144 STFD y02, 17 * SIZE(Y1)
1145 STFD y03, 18 * SIZE(Y1)
1146 STFD y04, 19 * SIZE(Y1)
1148 LFD y01, 32 * SIZE(Y1)
1149 LFD y02, 33 * SIZE(Y1)
1150 LFD y03, 34 * SIZE(Y1)
1151 LFD y04, 35 * SIZE(Y1)
1154 addi Y1, Y1, 16 * SIZE
1159 STFD y05, 4 * SIZE(Y1)
1160 STFD y06, 5 * SIZE(Y1)
1161 STFD y07, 6 * SIZE(Y1)
1162 STFD y08, 7 * SIZE(Y1)
1164 STFD y09, 8 * SIZE(Y1)
1165 STFD y10, 9 * SIZE(Y1)
1166 STFD y11, 10 * SIZE(Y1)
1167 STFD y12, 11 * SIZE(Y1)
1169 STFD y13, 12 * SIZE(Y1)
1170 STFD y14, 13 * SIZE(Y1)
1171 STFD y15, 14 * SIZE(Y1)
1172 STFD y16, 15 * SIZE(Y1)
1174 addi Y1, Y1, 16 * SIZE
1184 LFD y01, 0 * SIZE(Y1)
1185 LFD y02, 1 * SIZE(Y1)
1186 LFD y03, 2 * SIZE(Y1)
1187 LFD y04, 3 * SIZE(Y1)
1189 LFD a1, 0 * SIZE(AO1)
1190 LFD a2, 1 * SIZE(AO1)
1191 LFD a3, 2 * SIZE(AO1)
1192 LFD a4, 3 * SIZE(AO1)
1194 LFD y05, 4 * SIZE(Y1)
1195 LFD y06, 5 * SIZE(Y1)
1196 LFD y07, 6 * SIZE(Y1)
1197 LFD y08, 7 * SIZE(Y1)
1199 LFD a5, 4 * SIZE(AO1)
1200 LFD a6, 5 * SIZE(AO1)
1201 LFD a7, 6 * SIZE(AO1)
1202 LFD a8, 7 * SIZE(AO1)
1204 FMADD y01, alpha1, a1, y01
1205 LFD a1, 0 * SIZE(AO2)
1206 FMADD y02, alpha1, a2, y02
1207 LFD a2, 1 * SIZE(AO2)
1208 FMADD y03, alpha1, a3, y03
1209 LFD a3, 2 * SIZE(AO2)
1210 FMADD y04, alpha1, a4, y04
1211 LFD a4, 3 * SIZE(AO2)
1213 FMADD y05, alpha1, a5, y05
1214 LFD a5, 4 * SIZE(AO2)
1215 FMADD y06, alpha1, a6, y06
1216 LFD a6, 5 * SIZE(AO2)
1217 FMADD y07, alpha1, a7, y07
1218 LFD a7, 6 * SIZE(AO2)
1219 FMADD y08, alpha1, a8, y08
1220 LFD a8, 7 * SIZE(AO2)
1222 FMADD y01, alpha2, a1, y01
1223 LFD a1, 0 * SIZE(AO3)
1224 FMADD y02, alpha2, a2, y02
1225 LFD a2, 1 * SIZE(AO3)
1227 FMADD y03, alpha2, a3, y03
1228 LFD a3, 2 * SIZE(AO3)
1229 FMADD y04, alpha2, a4, y04
1230 LFD a4, 3 * SIZE(AO3)
1232 FMADD y05, alpha2, a5, y05
1233 LFD a5, 4 * SIZE(AO3)
1234 FMADD y06, alpha2, a6, y06
1235 LFD a6, 5 * SIZE(AO3)
1237 FMADD y07, alpha2, a7, y07
1238 LFD a7, 6 * SIZE(AO3)
1239 FMADD y08, alpha2, a8, y08
1240 LFD a8, 7 * SIZE(AO3)
1242 FMADD y01, alpha3, a1, y01
1243 LFD a1, 0 * SIZE(AO4)
1244 FMADD y02, alpha3, a2, y02
1245 LFD a2, 1 * SIZE(AO4)
1247 FMADD y03, alpha3, a3, y03
1248 LFD a3, 2 * SIZE(AO4)
1249 FMADD y04, alpha3, a4, y04
1250 LFD a4, 3 * SIZE(AO4)
1252 FMADD y05, alpha3, a5, y05
1253 LFD a5, 4 * SIZE(AO4)
1254 FMADD y06, alpha3, a6, y06
1255 LFD a6, 5 * SIZE(AO4)
1257 FMADD y07, alpha3, a7, y07
1258 LFD a7, 6 * SIZE(AO4)
1259 FMADD y08, alpha3, a8, y08
1260 LFD a8, 7 * SIZE(AO4)
1262 FMADD y01, alpha4, a1, y01
1263 LFD a1, 0 * SIZE(AO5)
1264 FMADD y02, alpha4, a2, y02
1265 LFD a2, 1 * SIZE(AO5)
1267 FMADD y03, alpha4, a3, y03
1268 LFD a3, 2 * SIZE(AO5)
1269 FMADD y04, alpha4, a4, y04
1270 LFD a4, 3 * SIZE(AO5)
1272 FMADD y05, alpha4, a5, y05
1273 LFD a5, 4 * SIZE(AO5)
1274 FMADD y06, alpha4, a6, y06
1275 LFD a6, 5 * SIZE(AO5)
1277 FMADD y07, alpha4, a7, y07
1278 LFD a7, 6 * SIZE(AO5)
1279 FMADD y08, alpha4, a8, y08
1280 LFD a8, 7 * SIZE(AO5)
1282 FMADD y01, alpha5, a1, y01
1283 LFD a1, 0 * SIZE(AO6)
1284 FMADD y02, alpha5, a2, y02
1285 LFD a2, 1 * SIZE(AO6)
1287 FMADD y03, alpha5, a3, y03
1288 LFD a3, 2 * SIZE(AO6)
1289 FMADD y04, alpha5, a4, y04
1290 LFD a4, 3 * SIZE(AO6)
1292 FMADD y05, alpha5, a5, y05
1293 LFD a5, 4 * SIZE(AO6)
1294 FMADD y06, alpha5, a6, y06
1295 LFD a6, 5 * SIZE(AO6)
1297 FMADD y07, alpha5, a7, y07
1298 LFD a7, 6 * SIZE(AO6)
1299 FMADD y08, alpha5, a8, y08
1300 LFD a8, 7 * SIZE(AO6)
1302 FMADD y01, alpha6, a1, y01
1303 LFD a1, 0 * SIZE(AO7)
1304 FMADD y02, alpha6, a2, y02
1305 LFD a2, 1 * SIZE(AO7)
1307 FMADD y03, alpha6, a3, y03
1308 LFD a3, 2 * SIZE(AO7)
1309 FMADD y04, alpha6, a4, y04
1310 LFD a4, 3 * SIZE(AO7)
1312 FMADD y05, alpha6, a5, y05
1313 LFD a5, 4 * SIZE(AO7)
1314 FMADD y06, alpha6, a6, y06
1315 LFD a6, 5 * SIZE(AO7)
1317 FMADD y07, alpha6, a7, y07
1318 LFD a7, 6 * SIZE(AO7)
1319 FMADD y08, alpha6, a8, y08
1320 LFD a8, 7 * SIZE(AO7)
1322 FMADD y01, alpha7, a1, y01
1323 LFD a1, 0 * SIZE(AO8)
1324 FMADD y02, alpha7, a2, y02
1325 LFD a2, 1 * SIZE(AO8)
1327 FMADD y03, alpha7, a3, y03
1328 LFD a3, 2 * SIZE(AO8)
1329 FMADD y04, alpha7, a4, y04
1330 LFD a4, 3 * SIZE(AO8)
1332 FMADD y05, alpha7, a5, y05
1333 LFD a5, 4 * SIZE(AO8)
1334 FMADD y06, alpha7, a6, y06
1335 LFD a6, 5 * SIZE(AO8)
1337 FMADD y07, alpha7, a7, y07
1338 LFD a7, 6 * SIZE(AO8)
1339 FMADD y08, alpha7, a8, y08
1340 LFD a8, 7 * SIZE(AO8)
1342 FMADD y01, alpha8, a1, y01
1343 addi AO1, AO1, 8 * SIZE
1344 FMADD y02, alpha8, a2, y02
1345 addi AO2, AO2, 8 * SIZE
1346 FMADD y03, alpha8, a3, y03
1347 addi AO3, AO3, 8 * SIZE
1348 FMADD y04, alpha8, a4, y04
1349 addi AO4, AO4, 8 * SIZE
1351 STFD y01, 0 * SIZE(Y1)
1352 STFD y02, 1 * SIZE(Y1)
1353 STFD y03, 2 * SIZE(Y1)
1354 STFD y04, 3 * SIZE(Y1)
1356 FMADD y05, alpha8, a5, y05
1357 addi AO5, AO5, 8 * SIZE
1358 FMADD y06, alpha8, a6, y06
1359 addi AO6, AO6, 8 * SIZE
1360 FMADD y07, alpha8, a7, y07
1361 addi AO7, AO7, 8 * SIZE
1362 FMADD y08, alpha8, a8, y08
1363 addi AO8, AO8, 8 * SIZE
1365 STFD y05, 4 * SIZE(Y1)
1366 STFD y06, 5 * SIZE(Y1)
1367 STFD y07, 6 * SIZE(Y1)
1368 STFD y08, 7 * SIZE(Y1)
1369 addi Y1, Y1, 8 * SIZE
1376 LFD y01, 0 * SIZE(Y1)
1377 LFD y02, 1 * SIZE(Y1)
1378 LFD y03, 2 * SIZE(Y1)
1379 LFD y04, 3 * SIZE(Y1)
1381 LFD a1, 0 * SIZE(AO1)
1382 LFD a2, 1 * SIZE(AO1)
1383 LFD a3, 2 * SIZE(AO1)
1384 LFD a4, 3 * SIZE(AO1)
1386 LFD a5, 0 * SIZE(AO2)
1387 LFD a6, 1 * SIZE(AO2)
1388 LFD a7, 2 * SIZE(AO2)
1389 LFD a8, 3 * SIZE(AO2)
1391 FMADD y01, alpha1, a1, y01
1392 LFD a1, 0 * SIZE(AO3)
1393 FMADD y02, alpha1, a2, y02
1394 LFD a2, 1 * SIZE(AO3)
1395 FMADD y03, alpha1, a3, y03
1396 LFD a3, 2 * SIZE(AO3)
1397 FMADD y04, alpha1, a4, y04
1398 LFD a4, 3 * SIZE(AO3)
1400 FMADD y01, alpha2, a5, y01
1401 LFD a5, 0 * SIZE(AO4)
1402 FMADD y02, alpha2, a6, y02
1403 LFD a6, 1 * SIZE(AO4)
1404 FMADD y03, alpha2, a7, y03
1405 LFD a7, 2 * SIZE(AO4)
1406 FMADD y04, alpha2, a8, y04
1407 LFD a8, 3 * SIZE(AO4)
1409 FMADD y01, alpha3, a1, y01
1410 LFD a1, 0 * SIZE(AO5)
1411 FMADD y02, alpha3, a2, y02
1412 LFD a2, 1 * SIZE(AO5)
1413 FMADD y03, alpha3, a3, y03
1414 LFD a3, 2 * SIZE(AO5)
1415 FMADD y04, alpha3, a4, y04
1416 LFD a4, 3 * SIZE(AO5)
1418 FMADD y01, alpha4, a5, y01
1419 LFD a5, 0 * SIZE(AO6)
1420 FMADD y02, alpha4, a6, y02
1421 LFD a6, 1 * SIZE(AO6)
1422 FMADD y03, alpha4, a7, y03
1423 LFD a7, 2 * SIZE(AO6)
1424 FMADD y04, alpha4, a8, y04
1425 LFD a8, 3 * SIZE(AO6)
1427 FMADD y01, alpha5, a1, y01
1428 LFD a1, 0 * SIZE(AO7)
1429 FMADD y02, alpha5, a2, y02
1430 LFD a2, 1 * SIZE(AO7)
1431 FMADD y03, alpha5, a3, y03
1432 LFD a3, 2 * SIZE(AO7)
1433 FMADD y04, alpha5, a4, y04
1434 LFD a4, 3 * SIZE(AO7)
1436 FMADD y01, alpha6, a5, y01
1437 LFD a5, 0 * SIZE(AO8)
1438 FMADD y02, alpha6, a6, y02
1439 LFD a6, 1 * SIZE(AO8)
1440 FMADD y03, alpha6, a7, y03
1441 LFD a7, 2 * SIZE(AO8)
1442 FMADD y04, alpha6, a8, y04
1443 LFD a8, 3 * SIZE(AO8)
1445 FMADD y01, alpha7, a1, y01
1446 addi AO1, AO1, 4 * SIZE
1447 FMADD y02, alpha7, a2, y02
1448 addi AO2, AO2, 4 * SIZE
1449 FMADD y03, alpha7, a3, y03
1450 addi AO3, AO3, 4 * SIZE
1451 FMADD y04, alpha7, a4, y04
1452 addi AO4, AO4, 4 * SIZE
1454 FMADD y01, alpha8, a5, y01
1455 addi AO5, AO5, 4 * SIZE
1456 FMADD y02, alpha8, a6, y02
1457 addi AO6, AO6, 4 * SIZE
1458 FMADD y03, alpha8, a7, y03
1459 addi AO7, AO7, 4 * SIZE
1460 FMADD y04, alpha8, a8, y04
1461 addi AO8, AO8, 4 * SIZE
1463 STFD y01, 0 * SIZE(Y1)
1464 STFD y02, 1 * SIZE(Y1)
1465 STFD y03, 2 * SIZE(Y1)
1466 STFD y04, 3 * SIZE(Y1)
1467 addi Y1, Y1, 4 * SIZE
1474 LFD y01, 0 * SIZE(Y1)
1475 LFD y02, 1 * SIZE(Y1)
1477 LFD a1, 0 * SIZE(AO1)
1478 LFD a2, 1 * SIZE(AO1)
1479 LFD a3, 0 * SIZE(AO2)
1480 LFD a4, 1 * SIZE(AO2)
1482 LFD a5, 0 * SIZE(AO3)
1483 LFD a6, 1 * SIZE(AO3)
1484 LFD a7, 0 * SIZE(AO4)
1485 LFD a8, 1 * SIZE(AO4)
1487 FMADD y01, alpha1, a1, y01
1488 LFD a1, 0 * SIZE(AO5)
1489 FMADD y02, alpha1, a2, y02
1490 LFD a2, 1 * SIZE(AO5)
1491 FMADD y01, alpha2, a3, y01
1492 LFD a3, 0 * SIZE(AO6)
1493 FMADD y02, alpha2, a4, y02
1494 LFD a4, 1 * SIZE(AO6)
1496 FMADD y01, alpha3, a5, y01
1497 LFD a5, 0 * SIZE(AO7)
1498 FMADD y02, alpha3, a6, y02
1499 LFD a6, 1 * SIZE(AO7)
1500 FMADD y01, alpha4, a7, y01
1501 LFD a7, 0 * SIZE(AO8)
1502 FMADD y02, alpha4, a8, y02
1503 LFD a8, 1 * SIZE(AO8)
1505 FMADD y01, alpha5, a1, y01
1506 addi AO1, AO1, 2 * SIZE
1507 FMADD y02, alpha5, a2, y02
1508 addi AO2, AO2, 2 * SIZE
1509 FMADD y01, alpha6, a3, y01
1510 addi AO3, AO3, 2 * SIZE
1511 FMADD y02, alpha6, a4, y02
1512 addi AO4, AO4, 2 * SIZE
1514 FMADD y01, alpha7, a5, y01
1515 addi AO5, AO5, 2 * SIZE
1516 FMADD y02, alpha7, a6, y02
1517 addi AO6, AO6, 2 * SIZE
1518 FMADD y01, alpha8, a7, y01
1519 addi AO7, AO7, 2 * SIZE
1520 FMADD y02, alpha8, a8, y02
1521 addi AO8, AO8, 2 * SIZE
1523 STFD y01, 0 * SIZE(Y1)
1524 STFD y02, 1 * SIZE(Y1)
1525 addi Y1, Y1, 2 * SIZE
1532 LFD y01, 0 * SIZE(Y1)
1534 LFD a1, 0 * SIZE(AO1)
1535 LFD a2, 0 * SIZE(AO2)
1536 LFD a3, 0 * SIZE(AO3)
1537 LFD a4, 0 * SIZE(AO4)
1538 LFD a5, 0 * SIZE(AO5)
1539 LFD a6, 0 * SIZE(AO6)
1540 LFD a7, 0 * SIZE(AO7)
1541 LFD a8, 0 * SIZE(AO8)
1543 FMADD y01, alpha1, a1, y01
1544 FMADD y01, alpha2, a2, y01
1545 FMADD y01, alpha3, a3, y01
1546 FMADD y01, alpha4, a4, y01
1548 FMADD y01, alpha5, a5, y01
1549 FMADD y01, alpha6, a6, y01
1550 FMADD y01, alpha7, a7, y01
1551 FMADD y01, alpha8, a8, y01
1553 STFD y01, 0 * SIZE(Y1)
1570 LFD alpha1, 0 * SIZE(X)
1572 LFD alpha2, 0 * SIZE(X)
1574 LFD alpha3, 0 * SIZE(X)
1576 LFD alpha4, 0 * SIZE(X)
1579 FMUL alpha1, alpha, alpha1
1581 FMUL alpha2, alpha, alpha2
1583 FMUL alpha3, alpha, alpha3
1585 FMUL alpha4, alpha, alpha4
1592 LFD y01, 0 * SIZE(Y1)
1593 LFD y02, 1 * SIZE(Y1)
1594 LFD y03, 2 * SIZE(Y1)
1595 LFD y04, 3 * SIZE(Y1)
1596 LFD y05, 4 * SIZE(Y1)
1597 LFD y06, 5 * SIZE(Y1)
1598 LFD y07, 6 * SIZE(Y1)
1599 LFD y08, 7 * SIZE(Y1)
1600 LFD y09, 8 * SIZE(Y1)
1601 LFD y10, 9 * SIZE(Y1)
1602 LFD y11, 10 * SIZE(Y1)
1603 LFD y12, 11 * SIZE(Y1)
1604 LFD y13, 12 * SIZE(Y1)
1605 LFD y14, 13 * SIZE(Y1)
1606 LFD y15, 14 * SIZE(Y1)
1607 LFD y16, 15 * SIZE(Y1)
1609 LFD a1, 0 * SIZE(AO1)
1610 LFD a2, 1 * SIZE(AO1)
1611 LFD a3, 2 * SIZE(AO1)
1612 LFD a4, 3 * SIZE(AO1)
1613 LFD a5, 4 * SIZE(AO1)
1614 LFD a6, 5 * SIZE(AO1)
1615 LFD a7, 6 * SIZE(AO1)
1616 LFD a8, 7 * SIZE(AO1)
1621 FMADD y01, alpha1, a1, y01
1622 LFD a1, 8 * SIZE(AO1)
1623 FMADD y02, alpha1, a2, y02
1624 LFD a2, 9 * SIZE(AO1)
1625 FMADD y03, alpha1, a3, y03
1626 LFD a3, 10 * SIZE(AO1)
1627 FMADD y04, alpha1, a4, y04
1628 LFD a4, 11 * SIZE(AO1)
1630 FMADD y05, alpha1, a5, y05
1631 LFD a5, 12 * SIZE(AO1)
1632 FMADD y06, alpha1, a6, y06
1633 LFD a6, 13 * SIZE(AO1)
1634 FMADD y07, alpha1, a7, y07
1635 LFD a7, 14 * SIZE(AO1)
1636 FMADD y08, alpha1, a8, y08
1637 LFD a8, 15 * SIZE(AO1)
1639 FMADD y09, alpha1, a1, y09
1640 LFD a1, 0 * SIZE(AO2)
1641 FMADD y10, alpha1, a2, y10
1642 LFD a2, 1 * SIZE(AO2)
1643 FMADD y11, alpha1, a3, y11
1644 LFD a3, 2 * SIZE(AO2)
1645 FMADD y12, alpha1, a4, y12
1646 LFD a4, 3 * SIZE(AO2)
1648 FMADD y13, alpha1, a5, y13
1649 LFD a5, 4 * SIZE(AO2)
1650 FMADD y14, alpha1, a6, y14
1651 LFD a6, 5 * SIZE(AO2)
1652 FMADD y15, alpha1, a7, y15
1653 LFD a7, 6 * SIZE(AO2)
1654 FMADD y16, alpha1, a8, y16
1655 LFD a8, 7 * SIZE(AO2)
1657 FMADD y01, alpha2, a1, y01
1658 LFD a1, 8 * SIZE(AO2)
1659 FMADD y02, alpha2, a2, y02
1660 LFD a2, 9 * SIZE(AO2)
1661 FMADD y03, alpha2, a3, y03
1662 LFD a3, 10 * SIZE(AO2)
1663 FMADD y04, alpha2, a4, y04
1664 LFD a4, 11 * SIZE(AO2)
1666 FMADD y05, alpha2, a5, y05
1667 LFD a5, 12 * SIZE(AO2)
1668 FMADD y06, alpha2, a6, y06
1669 LFD a6, 13 * SIZE(AO2)
1670 FMADD y07, alpha2, a7, y07
1671 LFD a7, 14 * SIZE(AO2)
1672 FMADD y08, alpha2, a8, y08
1673 LFD a8, 15 * SIZE(AO2)
1675 addi AO1, AO1, 16 * SIZE
1676 addi AO2, AO2, 16 * SIZE
1680 FMADD y09, alpha2, a1, y09
1681 LFD a1, 0 * SIZE(AO3)
1682 FMADD y10, alpha2, a2, y10
1683 LFD a2, 1 * SIZE(AO3)
1684 FMADD y11, alpha2, a3, y11
1685 LFD a3, 2 * SIZE(AO3)
1686 FMADD y12, alpha2, a4, y12
1687 LFD a4, 3 * SIZE(AO3)
1689 FMADD y13, alpha2, a5, y13
1690 LFD a5, 4 * SIZE(AO3)
1691 FMADD y14, alpha2, a6, y14
1692 LFD a6, 5 * SIZE(AO3)
1693 FMADD y15, alpha2, a7, y15
1694 LFD a7, 6 * SIZE(AO3)
1695 FMADD y16, alpha2, a8, y16
1696 LFD a8, 7 * SIZE(AO3)
1698 FMADD y01, alpha3, a1, y01
1699 LFD a1, 8 * SIZE(AO3)
1700 FMADD y02, alpha3, a2, y02
1701 LFD a2, 9 * SIZE(AO3)
1702 FMADD y03, alpha3, a3, y03
1703 LFD a3, 10 * SIZE(AO3)
1704 FMADD y04, alpha3, a4, y04
1705 LFD a4, 11 * SIZE(AO3)
1707 FMADD y05, alpha3, a5, y05
1708 LFD a5, 12 * SIZE(AO3)
1709 FMADD y06, alpha3, a6, y06
1710 LFD a6, 13 * SIZE(AO3)
1711 FMADD y07, alpha3, a7, y07
1712 LFD a7, 14 * SIZE(AO3)
1713 FMADD y08, alpha3, a8, y08
1714 LFD a8, 15 * SIZE(AO3)
1716 FMADD y09, alpha3, a1, y09
1717 LFD a1, 0 * SIZE(AO4)
1718 FMADD y10, alpha3, a2, y10
1719 LFD a2, 1 * SIZE(AO4)
1720 FMADD y11, alpha3, a3, y11
1721 LFD a3, 2 * SIZE(AO4)
1722 FMADD y12, alpha3, a4, y12
1723 LFD a4, 3 * SIZE(AO4)
1725 FMADD y13, alpha3, a5, y13
1726 LFD a5, 4 * SIZE(AO4)
1727 FMADD y14, alpha3, a6, y14
1728 LFD a6, 5 * SIZE(AO4)
1729 FMADD y15, alpha3, a7, y15
1730 LFD a7, 6 * SIZE(AO4)
1731 FMADD y16, alpha3, a8, y16
1732 LFD a8, 7 * SIZE(AO4)
1734 FMADD y01, alpha4, a1, y01
1735 LFD a1, 8 * SIZE(AO4)
1736 FMADD y02, alpha4, a2, y02
1737 LFD a2, 9 * SIZE(AO4)
1738 FMADD y03, alpha4, a3, y03
1739 LFD a3, 10 * SIZE(AO4)
1740 FMADD y04, alpha4, a4, y04
1741 LFD a4, 11 * SIZE(AO4)
1743 STFD y01, 0 * SIZE(Y1)
1744 STFD y02, 1 * SIZE(Y1)
1745 STFD y03, 2 * SIZE(Y1)
1746 STFD y04, 3 * SIZE(Y1)
1748 LFD y01, 16 * SIZE(Y1)
1749 LFD y02, 17 * SIZE(Y1)
1750 LFD y03, 18 * SIZE(Y1)
1751 LFD y04, 19 * SIZE(Y1)
1753 FMADD y05, alpha4, a5, y05
1754 LFD a5, 12 * SIZE(AO4)
1755 FMADD y06, alpha4, a6, y06
1756 LFD a6, 13 * SIZE(AO4)
1757 FMADD y07, alpha4, a7, y07
1758 LFD a7, 14 * SIZE(AO4)
1759 FMADD y08, alpha4, a8, y08
1760 LFD a8, 15 * SIZE(AO4)
1762 STFD y05, 4 * SIZE(Y1)
1763 STFD y06, 5 * SIZE(Y1)
1764 STFD y07, 6 * SIZE(Y1)
1765 STFD y08, 7 * SIZE(Y1)
1767 LFD y05, 20 * SIZE(Y1)
1768 LFD y06, 21 * SIZE(Y1)
1769 LFD y07, 22 * SIZE(Y1)
1770 LFD y08, 23 * SIZE(Y1)
1772 addi AO3, AO3, 16 * SIZE
1773 addi AO4, AO4, 16 * SIZE
1777 FMADD y09, alpha4, a1, y09
1778 LFD a1, 0 * SIZE(AO1)
1779 FMADD y10, alpha4, a2, y10
1780 LFD a2, 1 * SIZE(AO1)
1781 FMADD y11, alpha4, a3, y11
1782 LFD a3, 2 * SIZE(AO1)
1783 FMADD y12, alpha4, a4, y12
1784 LFD a4, 3 * SIZE(AO1)
1786 STFD y09, 8 * SIZE(Y1)
1787 STFD y10, 9 * SIZE(Y1)
1788 STFD y11, 10 * SIZE(Y1)
1789 STFD y12, 11 * SIZE(Y1)
1791 LFD y09, 24 * SIZE(Y1)
1792 LFD y10, 25 * SIZE(Y1)
1793 LFD y11, 26 * SIZE(Y1)
1794 LFD y12, 27 * SIZE(Y1)
1796 FMADD y13, alpha4, a5, y13
1797 LFD a5, 4 * SIZE(AO1)
1798 FMADD y14, alpha4, a6, y14
1799 LFD a6, 5 * SIZE(AO1)
1800 FMADD y15, alpha4, a7, y15
1801 LFD a7, 6 * SIZE(AO1)
1802 FMADD y16, alpha4, a8, y16
1803 LFD a8, 7 * SIZE(AO1)
1805 STFD y13, 12 * SIZE(Y1)
1806 STFD y14, 13 * SIZE(Y1)
1807 STFD y15, 14 * SIZE(Y1)
1808 STFD y16, 15 * SIZE(Y1)
1810 LFD y13, 28 * SIZE(Y1)
1811 LFD y14, 29 * SIZE(Y1)
1812 LFD y15, 30 * SIZE(Y1)
1813 LFD y16, 31 * SIZE(Y1)
1815 addi Y1, Y1, 16 * SIZE
1821 FMADD y01, alpha1, a1, y01
1822 LFD a1, 8 * SIZE(AO1)
1823 FMADD y02, alpha1, a2, y02
1824 LFD a2, 9 * SIZE(AO1)
1825 FMADD y03, alpha1, a3, y03
1826 LFD a3, 10 * SIZE(AO1)
1827 FMADD y04, alpha1, a4, y04
1828 LFD a4, 11 * SIZE(AO1)
1830 FMADD y05, alpha1, a5, y05
1831 LFD a5, 12 * SIZE(AO1)
1832 FMADD y06, alpha1, a6, y06
1833 LFD a6, 13 * SIZE(AO1)
1834 FMADD y07, alpha1, a7, y07
1835 LFD a7, 14 * SIZE(AO1)
1836 FMADD y08, alpha1, a8, y08
1837 LFD a8, 15 * SIZE(AO1)
1839 FMADD y09, alpha1, a1, y09
1840 LFD a1, 0 * SIZE(AO2)
1841 FMADD y10, alpha1, a2, y10
1842 LFD a2, 1 * SIZE(AO2)
1843 FMADD y11, alpha1, a3, y11
1844 LFD a3, 2 * SIZE(AO2)
1845 FMADD y12, alpha1, a4, y12
1846 LFD a4, 3 * SIZE(AO2)
1848 FMADD y13, alpha1, a5, y13
1849 LFD a5, 4 * SIZE(AO2)
1850 FMADD y14, alpha1, a6, y14
1851 LFD a6, 5 * SIZE(AO2)
1852 FMADD y15, alpha1, a7, y15
1853 LFD a7, 6 * SIZE(AO2)
1854 FMADD y16, alpha1, a8, y16
1855 LFD a8, 7 * SIZE(AO2)
1857 FMADD y01, alpha2, a1, y01
1858 LFD a1, 8 * SIZE(AO2)
1859 FMADD y02, alpha2, a2, y02
1860 LFD a2, 9 * SIZE(AO2)
1861 FMADD y03, alpha2, a3, y03
1862 LFD a3, 10 * SIZE(AO2)
1863 FMADD y04, alpha2, a4, y04
1864 LFD a4, 11 * SIZE(AO2)
1866 FMADD y05, alpha2, a5, y05
1867 LFD a5, 12 * SIZE(AO2)
1868 FMADD y06, alpha2, a6, y06
1869 LFD a6, 13 * SIZE(AO2)
1870 FMADD y07, alpha2, a7, y07
1871 LFD a7, 14 * SIZE(AO2)
1872 FMADD y08, alpha2, a8, y08
1873 LFD a8, 15 * SIZE(AO2)
1875 FMADD y09, alpha2, a1, y09
1876 LFD a1, 0 * SIZE(AO3)
1877 FMADD y10, alpha2, a2, y10
1878 LFD a2, 1 * SIZE(AO3)
1879 FMADD y11, alpha2, a3, y11
1880 LFD a3, 2 * SIZE(AO3)
1881 FMADD y12, alpha2, a4, y12
1882 LFD a4, 3 * SIZE(AO3)
1884 FMADD y13, alpha2, a5, y13
1885 LFD a5, 4 * SIZE(AO3)
1886 FMADD y14, alpha2, a6, y14
1887 LFD a6, 5 * SIZE(AO3)
1888 FMADD y15, alpha2, a7, y15
1889 LFD a7, 6 * SIZE(AO3)
1890 FMADD y16, alpha2, a8, y16
1891 LFD a8, 7 * SIZE(AO3)
1893 FMADD y01, alpha3, a1, y01
1894 LFD a1, 8 * SIZE(AO3)
1895 FMADD y02, alpha3, a2, y02
1896 LFD a2, 9 * SIZE(AO3)
1897 FMADD y03, alpha3, a3, y03
1898 LFD a3, 10 * SIZE(AO3)
1899 FMADD y04, alpha3, a4, y04
1900 LFD a4, 11 * SIZE(AO3)
1902 FMADD y05, alpha3, a5, y05
1903 LFD a5, 12 * SIZE(AO3)
1904 FMADD y06, alpha3, a6, y06
1905 LFD a6, 13 * SIZE(AO3)
1906 FMADD y07, alpha3, a7, y07
1907 LFD a7, 14 * SIZE(AO3)
1908 FMADD y08, alpha3, a8, y08
1909 LFD a8, 15 * SIZE(AO3)
1911 FMADD y09, alpha3, a1, y09
1912 LFD a1, 0 * SIZE(AO4)
1913 FMADD y10, alpha3, a2, y10
1914 LFD a2, 1 * SIZE(AO4)
1915 FMADD y11, alpha3, a3, y11
1916 LFD a3, 2 * SIZE(AO4)
1917 FMADD y12, alpha3, a4, y12
1918 LFD a4, 3 * SIZE(AO4)
1920 FMADD y13, alpha3, a5, y13
1921 LFD a5, 4 * SIZE(AO4)
1922 FMADD y14, alpha3, a6, y14
1923 LFD a6, 5 * SIZE(AO4)
1924 FMADD y15, alpha3, a7, y15
1925 LFD a7, 6 * SIZE(AO4)
1926 FMADD y16, alpha3, a8, y16
1927 LFD a8, 7 * SIZE(AO4)
1929 FMADD y01, alpha4, a1, y01
1930 LFD a1, 8 * SIZE(AO4)
1931 FMADD y02, alpha4, a2, y02
1932 LFD a2, 9 * SIZE(AO4)
1933 FMADD y03, alpha4, a3, y03
1934 LFD a3, 10 * SIZE(AO4)
1935 FMADD y04, alpha4, a4, y04
1936 LFD a4, 11 * SIZE(AO4)
1938 FMADD y05, alpha4, a5, y05
1939 LFD a5, 12 * SIZE(AO4)
1940 FMADD y06, alpha4, a6, y06
1941 LFD a6, 13 * SIZE(AO4)
1942 FMADD y07, alpha4, a7, y07
1943 LFD a7, 14 * SIZE(AO4)
1944 FMADD y08, alpha4, a8, y08
1945 LFD a8, 15 * SIZE(AO4)
1947 FMADD y09, alpha4, a1, y09
1948 addi AO1, AO1, 16 * SIZE
1949 FMADD y10, alpha4, a2, y10
1950 addi AO2, AO2, 16 * SIZE
1951 FMADD y11, alpha4, a3, y11
1952 addi AO3, AO3, 16 * SIZE
1953 FMADD y12, alpha4, a4, y12
1954 addi AO4, AO4, 16 * SIZE
1956 FMADD y13, alpha4, a5, y13
1957 FMADD y14, alpha4, a6, y14
1958 FMADD y15, alpha4, a7, y15
1959 FMADD y16, alpha4, a8, y16
1961 STFD y01, 0 * SIZE(Y1)
1962 STFD y02, 1 * SIZE(Y1)
1963 STFD y03, 2 * SIZE(Y1)
1964 STFD y04, 3 * SIZE(Y1)
1965 STFD y05, 4 * SIZE(Y1)
1966 STFD y06, 5 * SIZE(Y1)
1967 STFD y07, 6 * SIZE(Y1)
1968 STFD y08, 7 * SIZE(Y1)
1969 STFD y09, 8 * SIZE(Y1)
1970 STFD y10, 9 * SIZE(Y1)
1971 STFD y11, 10 * SIZE(Y1)
1972 STFD y12, 11 * SIZE(Y1)
1973 STFD y13, 12 * SIZE(Y1)
1974 STFD y14, 13 * SIZE(Y1)
1975 STFD y15, 14 * SIZE(Y1)
1976 STFD y16, 15 * SIZE(Y1)
1977 addi Y1, Y1, 16 * SIZE
1987 LFD y01, 0 * SIZE(Y1)
1988 LFD y02, 1 * SIZE(Y1)
1989 LFD y03, 2 * SIZE(Y1)
1990 LFD y04, 3 * SIZE(Y1)
1991 LFD y05, 4 * SIZE(Y1)
1992 LFD y06, 5 * SIZE(Y1)
1993 LFD y07, 6 * SIZE(Y1)
1994 LFD y08, 7 * SIZE(Y1)
1996 LFD a1, 0 * SIZE(AO1)
1997 LFD a2, 1 * SIZE(AO1)
1998 LFD a3, 2 * SIZE(AO1)
1999 LFD a4, 3 * SIZE(AO1)
2000 LFD a5, 4 * SIZE(AO1)
2001 LFD a6, 5 * SIZE(AO1)
2002 LFD a7, 6 * SIZE(AO1)
2003 LFD a8, 7 * SIZE(AO1)
2005 FMADD y01, alpha1, a1, y01
2006 LFD a1, 0 * SIZE(AO2)
2007 FMADD y02, alpha1, a2, y02
2008 LFD a2, 1 * SIZE(AO2)
2009 FMADD y03, alpha1, a3, y03
2010 LFD a3, 2 * SIZE(AO2)
2011 FMADD y04, alpha1, a4, y04
2012 LFD a4, 3 * SIZE(AO2)
2014 FMADD y05, alpha1, a5, y05
2015 LFD a5, 4 * SIZE(AO2)
2016 FMADD y06, alpha1, a6, y06
2017 LFD a6, 5 * SIZE(AO2)
2018 FMADD y07, alpha1, a7, y07
2019 LFD a7, 6 * SIZE(AO2)
2020 FMADD y08, alpha1, a8, y08
2021 LFD a8, 7 * SIZE(AO2)
2023 FMADD y01, alpha2, a1, y01
2024 LFD a1, 0 * SIZE(AO3)
2025 FMADD y02, alpha2, a2, y02
2026 LFD a2, 1 * SIZE(AO3)
2027 FMADD y03, alpha2, a3, y03
2028 LFD a3, 2 * SIZE(AO3)
2029 FMADD y04, alpha2, a4, y04
2030 LFD a4, 3 * SIZE(AO3)
2031 FMADD y05, alpha2, a5, y05
2032 LFD a5, 4 * SIZE(AO3)
2033 FMADD y06, alpha2, a6, y06
2034 LFD a6, 5 * SIZE(AO3)
2035 FMADD y07, alpha2, a7, y07
2036 LFD a7, 6 * SIZE(AO3)
2037 FMADD y08, alpha2, a8, y08
2038 LFD a8, 7 * SIZE(AO3)
2040 FMADD y01, alpha3, a1, y01
2041 LFD a1, 0 * SIZE(AO4)
2042 FMADD y02, alpha3, a2, y02
2043 LFD a2, 1 * SIZE(AO4)
2044 FMADD y03, alpha3, a3, y03
2045 LFD a3, 2 * SIZE(AO4)
2046 FMADD y04, alpha3, a4, y04
2047 LFD a4, 3 * SIZE(AO4)
2049 FMADD y05, alpha3, a5, y05
2050 LFD a5, 4 * SIZE(AO4)
2051 FMADD y06, alpha3, a6, y06
2052 LFD a6, 5 * SIZE(AO4)
2053 FMADD y07, alpha3, a7, y07
2054 LFD a7, 6 * SIZE(AO4)
2055 FMADD y08, alpha3, a8, y08
2056 LFD a8, 7 * SIZE(AO4)
2058 FMADD y01, alpha4, a1, y01
2059 addi AO1, AO1, 8 * SIZE
2060 FMADD y02, alpha4, a2, y02
2061 addi AO2, AO2, 8 * SIZE
2062 FMADD y03, alpha4, a3, y03
2063 addi AO3, AO3, 8 * SIZE
2064 FMADD y04, alpha4, a4, y04
2065 addi AO4, AO4, 8 * SIZE
2067 STFD y01, 0 * SIZE(Y1)
2068 STFD y02, 1 * SIZE(Y1)
2069 STFD y03, 2 * SIZE(Y1)
2070 STFD y04, 3 * SIZE(Y1)
2072 FMADD y05, alpha4, a5, y05
2073 FMADD y06, alpha4, a6, y06
2074 FMADD y07, alpha4, a7, y07
2075 FMADD y08, alpha4, a8, y08
2077 STFD y05, 4 * SIZE(Y1)
2078 STFD y06, 5 * SIZE(Y1)
2079 STFD y07, 6 * SIZE(Y1)
2080 STFD y08, 7 * SIZE(Y1)
2081 addi Y1, Y1, 8 * SIZE
2088 LFD y01, 0 * SIZE(Y1)
2089 LFD y02, 1 * SIZE(Y1)
2090 LFD y03, 2 * SIZE(Y1)
2091 LFD y04, 3 * SIZE(Y1)
2093 LFD a1, 0 * SIZE(AO1)
2094 LFD a2, 1 * SIZE(AO1)
2095 LFD a3, 2 * SIZE(AO1)
2096 LFD a4, 3 * SIZE(AO1)
2098 LFD a5, 0 * SIZE(AO2)
2099 LFD a6, 1 * SIZE(AO2)
2100 LFD a7, 2 * SIZE(AO2)
2101 LFD a8, 3 * SIZE(AO2)
2103 FMADD y01, alpha1, a1, y01
2104 LFD a1, 0 * SIZE(AO3)
2105 FMADD y02, alpha1, a2, y02
2106 LFD a2, 1 * SIZE(AO3)
2107 FMADD y03, alpha1, a3, y03
2108 LFD a3, 2 * SIZE(AO3)
2109 FMADD y04, alpha1, a4, y04
2110 LFD a4, 3 * SIZE(AO3)
2112 FMADD y01, alpha2, a5, y01
2113 LFD a5, 0 * SIZE(AO4)
2114 FMADD y02, alpha2, a6, y02
2115 LFD a6, 1 * SIZE(AO4)
2116 FMADD y03, alpha2, a7, y03
2117 LFD a7, 2 * SIZE(AO4)
2118 FMADD y04, alpha2, a8, y04
2119 LFD a8, 3 * SIZE(AO4)
2121 FMADD y01, alpha3, a1, y01
2122 addi AO1, AO1, 4 * SIZE
2123 FMADD y02, alpha3, a2, y02
2124 addi AO2, AO2, 4 * SIZE
2125 FMADD y03, alpha3, a3, y03
2126 addi AO3, AO3, 4 * SIZE
2127 FMADD y04, alpha3, a4, y04
2128 addi AO4, AO4, 4 * SIZE
2130 FMADD y01, alpha4, a5, y01
2131 FMADD y02, alpha4, a6, y02
2132 FMADD y03, alpha4, a7, y03
2133 FMADD y04, alpha4, a8, y04
2135 STFD y01, 0 * SIZE(Y1)
2136 STFD y02, 1 * SIZE(Y1)
2137 STFD y03, 2 * SIZE(Y1)
2138 STFD y04, 3 * SIZE(Y1)
2139 addi Y1, Y1, 4 * SIZE
2146 LFD y01, 0 * SIZE(Y1)
2147 LFD y02, 1 * SIZE(Y1)
2149 LFD a1, 0 * SIZE(AO1)
2150 LFD a2, 1 * SIZE(AO1)
2151 LFD a3, 0 * SIZE(AO2)
2152 LFD a4, 1 * SIZE(AO2)
2154 LFD a5, 0 * SIZE(AO3)
2155 LFD a6, 1 * SIZE(AO3)
2156 LFD a7, 0 * SIZE(AO4)
2157 LFD a8, 1 * SIZE(AO4)
2159 FMADD y01, alpha1, a1, y01
2160 addi AO1, AO1, 2 * SIZE
2161 FMADD y02, alpha1, a2, y02
2162 addi AO2, AO2, 2 * SIZE
2163 FMADD y01, alpha2, a3, y01
2164 addi AO3, AO3, 2 * SIZE
2165 FMADD y02, alpha2, a4, y02
2166 addi AO4, AO4, 2 * SIZE
2168 FMADD y01, alpha3, a5, y01
2169 FMADD y02, alpha3, a6, y02
2170 FMADD y01, alpha4, a7, y01
2171 FMADD y02, alpha4, a8, y02
2173 STFD y01, 0 * SIZE(Y1)
2174 STFD y02, 1 * SIZE(Y1)
2175 addi Y1, Y1, 2 * SIZE
2182 LFD y01, 0 * SIZE(Y1)
2184 LFD a1, 0 * SIZE(AO1)
2185 LFD a2, 0 * SIZE(AO2)
2186 LFD a3, 0 * SIZE(AO3)
2187 LFD a4, 0 * SIZE(AO4)
2189 FMADD y01, alpha1, a1, y01
2190 FMADD y01, alpha2, a2, y01
2191 FMADD y01, alpha3, a3, y01
2192 FMADD y01, alpha4, a4, y01
2194 STFD y01, 0 * SIZE(Y1)
2203 LFD alpha1, 0 * SIZE(X)
2205 LFD alpha2, 0 * SIZE(X)
2208 FMUL alpha1, alpha, alpha1
2209 FMUL alpha2, alpha, alpha2
2222 LFD y01, 0 * SIZE(Y1)
2223 LFD y02, 1 * SIZE(Y1)
2224 LFD y03, 2 * SIZE(Y1)
2225 LFD y04, 3 * SIZE(Y1)
2226 LFD y05, 4 * SIZE(Y1)
2227 LFD y06, 5 * SIZE(Y1)
2228 LFD y07, 6 * SIZE(Y1)
2229 LFD y08, 7 * SIZE(Y1)
2230 LFD y09, 8 * SIZE(Y1)
2231 LFD y10, 9 * SIZE(Y1)
2232 LFD y11, 10 * SIZE(Y1)
2233 LFD y12, 11 * SIZE(Y1)
2234 LFD y13, 12 * SIZE(Y1)
2235 LFD y14, 13 * SIZE(Y1)
2236 LFD y15, 14 * SIZE(Y1)
2237 LFD y16, 15 * SIZE(Y1)
2239 LFD a1, 0 * SIZE(AO1)
2240 LFD a2, 1 * SIZE(AO1)
2241 LFD a3, 2 * SIZE(AO1)
2242 LFD a4, 3 * SIZE(AO1)
2243 LFD a5, 4 * SIZE(AO1)
2244 LFD a6, 5 * SIZE(AO1)
2245 LFD a7, 6 * SIZE(AO1)
2246 LFD a8, 7 * SIZE(AO1)
2251 FMADD y01, alpha1, a1, y01
2252 LFD a1, 8 * SIZE(AO1)
2253 FMADD y02, alpha1, a2, y02
2254 LFD a2, 9 * SIZE(AO1)
2255 FMADD y03, alpha1, a3, y03
2256 LFD a3, 10 * SIZE(AO1)
2257 FMADD y04, alpha1, a4, y04
2258 LFD a4, 11 * SIZE(AO1)
2260 FMADD y05, alpha1, a5, y05
2261 LFD a5, 12 * SIZE(AO1)
2262 FMADD y06, alpha1, a6, y06
2263 LFD a6, 13 * SIZE(AO1)
2264 FMADD y07, alpha1, a7, y07
2265 LFD a7, 14 * SIZE(AO1)
2266 FMADD y08, alpha1, a8, y08
2267 LFD a8, 15 * SIZE(AO1)
2269 FMADD y09, alpha1, a1, y09
2270 LFD a1, 0 * SIZE(AO2)
2271 FMADD y10, alpha1, a2, y10
2272 LFD a2, 1 * SIZE(AO2)
2273 FMADD y11, alpha1, a3, y11
2274 LFD a3, 2 * SIZE(AO2)
2275 FMADD y12, alpha1, a4, y12
2276 LFD a4, 3 * SIZE(AO2)
2278 FMADD y13, alpha1, a5, y13
2279 LFD a5, 4 * SIZE(AO2)
2280 FMADD y14, alpha1, a6, y14
2281 LFD a6, 5 * SIZE(AO2)
2282 FMADD y15, alpha1, a7, y15
2283 LFD a7, 6 * SIZE(AO2)
2284 FMADD y16, alpha1, a8, y16
2285 LFD a8, 7 * SIZE(AO2)
2287 FMADD y01, alpha2, a1, y01
2288 LFD a1, 8 * SIZE(AO2)
2289 FMADD y02, alpha2, a2, y02
2290 LFD a2, 9 * SIZE(AO2)
2291 FMADD y03, alpha2, a3, y03
2292 LFD a3, 10 * SIZE(AO2)
2293 FMADD y04, alpha2, a4, y04
2294 LFD a4, 11 * SIZE(AO2)
2296 FMADD y05, alpha2, a5, y05
2297 LFD a5, 12 * SIZE(AO2)
2298 FMADD y06, alpha2, a6, y06
2299 LFD a6, 13 * SIZE(AO2)
2300 FMADD y07, alpha2, a7, y07
2301 LFD a7, 14 * SIZE(AO2)
2302 FMADD y08, alpha2, a8, y08
2303 LFD a8, 15 * SIZE(AO2)
2305 FMADD y09, alpha2, a1, y09
2306 LFD a1, 16 * SIZE(AO1)
2307 FMADD y10, alpha2, a2, y10
2308 LFD a2, 17 * SIZE(AO1)
2309 FMADD y11, alpha2, a3, y11
2310 LFD a3, 18 * SIZE(AO1)
2311 FMADD y12, alpha2, a4, y12
2312 LFD a4, 19 * SIZE(AO1)
2314 FMADD y13, alpha2, a5, y13
2315 LFD a5, 20 * SIZE(AO1)
2316 FMADD y14, alpha2, a6, y14
2317 LFD a6, 21 * SIZE(AO1)
2318 FMADD y15, alpha2, a7, y15
2319 LFD a7, 22 * SIZE(AO1)
2320 FMADD y16, alpha2, a8, y16
2321 LFD a8, 23 * SIZE(AO1)
2323 STFD y01, 0 * SIZE(Y1)
2324 STFD y02, 1 * SIZE(Y1)
2325 STFD y03, 2 * SIZE(Y1)
2326 STFD y04, 3 * SIZE(Y1)
2328 LFD y01, 16 * SIZE(Y1)
2329 LFD y02, 17 * SIZE(Y1)
2330 LFD y03, 18 * SIZE(Y1)
2331 LFD y04, 19 * SIZE(Y1)
2333 STFD y05, 4 * SIZE(Y1)
2334 STFD y06, 5 * SIZE(Y1)
2335 STFD y07, 6 * SIZE(Y1)
2336 STFD y08, 7 * SIZE(Y1)
2338 LFD y05, 20 * SIZE(Y1)
2339 LFD y06, 21 * SIZE(Y1)
2340 LFD y07, 22 * SIZE(Y1)
2341 LFD y08, 23 * SIZE(Y1)
2343 STFD y09, 8 * SIZE(Y1)
2344 STFD y10, 9 * SIZE(Y1)
2345 STFD y11, 10 * SIZE(Y1)
2346 STFD y12, 11 * SIZE(Y1)
2348 LFD y09, 24 * SIZE(Y1)
2349 LFD y10, 25 * SIZE(Y1)
2350 LFD y11, 26 * SIZE(Y1)
2351 LFD y12, 27 * SIZE(Y1)
2353 STFD y13, 12 * SIZE(Y1)
2354 STFD y14, 13 * SIZE(Y1)
2355 STFD y15, 14 * SIZE(Y1)
2356 STFD y16, 15 * SIZE(Y1)
2358 LFD y13, 28 * SIZE(Y1)
2359 LFD y14, 29 * SIZE(Y1)
2360 LFD y15, 30 * SIZE(Y1)
2361 LFD y16, 31 * SIZE(Y1)
2363 addi AO1, AO1, 16 * SIZE
2364 addi AO2, AO2, 16 * SIZE
2365 addi Y1, Y1, 16 * SIZE
2375 FMADD y01, alpha1, a1, y01
2376 LFD a1, 8 * SIZE(AO1)
2377 FMADD y02, alpha1, a2, y02
2378 LFD a2, 9 * SIZE(AO1)
2379 FMADD y03, alpha1, a3, y03
2380 LFD a3, 10 * SIZE(AO1)
2381 FMADD y04, alpha1, a4, y04
2382 LFD a4, 11 * SIZE(AO1)
2384 FMADD y05, alpha1, a5, y05
2385 LFD a5, 12 * SIZE(AO1)
2386 FMADD y06, alpha1, a6, y06
2387 LFD a6, 13 * SIZE(AO1)
2388 FMADD y07, alpha1, a7, y07
2389 LFD a7, 14 * SIZE(AO1)
2390 FMADD y08, alpha1, a8, y08
2391 LFD a8, 15 * SIZE(AO1)
2393 FMADD y09, alpha1, a1, y09
2394 LFD a1, 0 * SIZE(AO2)
2395 FMADD y10, alpha1, a2, y10
2396 LFD a2, 1 * SIZE(AO2)
2397 FMADD y11, alpha1, a3, y11
2398 LFD a3, 2 * SIZE(AO2)
2399 FMADD y12, alpha1, a4, y12
2400 LFD a4, 3 * SIZE(AO2)
2402 FMADD y13, alpha1, a5, y13
2403 LFD a5, 4 * SIZE(AO2)
2404 FMADD y14, alpha1, a6, y14
2405 LFD a6, 5 * SIZE(AO2)
2406 FMADD y15, alpha1, a7, y15
2407 LFD a7, 6 * SIZE(AO2)
2408 FMADD y16, alpha1, a8, y16
2409 LFD a8, 7 * SIZE(AO2)
2411 FMADD y01, alpha2, a1, y01
2412 LFD a1, 8 * SIZE(AO2)
2413 FMADD y02, alpha2, a2, y02
2414 LFD a2, 9 * SIZE(AO2)
2415 FMADD y03, alpha2, a3, y03
2416 LFD a3, 10 * SIZE(AO2)
2417 FMADD y04, alpha2, a4, y04
2418 LFD a4, 11 * SIZE(AO2)
2420 FMADD y05, alpha2, a5, y05
2421 LFD a5, 12 * SIZE(AO2)
2422 FMADD y06, alpha2, a6, y06
2423 LFD a6, 13 * SIZE(AO2)
2424 FMADD y07, alpha2, a7, y07
2425 LFD a7, 14 * SIZE(AO2)
2426 FMADD y08, alpha2, a8, y08
2427 LFD a8, 15 * SIZE(AO2)
2429 FMADD y09, alpha2, a1, y09
2430 FMADD y10, alpha2, a2, y10
2431 FMADD y11, alpha2, a3, y11
2432 FMADD y12, alpha2, a4, y12
2433 FMADD y13, alpha2, a5, y13
2434 FMADD y14, alpha2, a6, y14
2435 FMADD y15, alpha2, a7, y15
2436 FMADD y16, alpha2, a8, y16
2438 STFD y01, 0 * SIZE(Y1)
2439 STFD y02, 1 * SIZE(Y1)
2440 STFD y03, 2 * SIZE(Y1)
2441 STFD y04, 3 * SIZE(Y1)
2442 STFD y05, 4 * SIZE(Y1)
2443 STFD y06, 5 * SIZE(Y1)
2444 STFD y07, 6 * SIZE(Y1)
2445 STFD y08, 7 * SIZE(Y1)
2446 STFD y09, 8 * SIZE(Y1)
2447 STFD y10, 9 * SIZE(Y1)
2448 STFD y11, 10 * SIZE(Y1)
2449 STFD y12, 11 * SIZE(Y1)
2450 STFD y13, 12 * SIZE(Y1)
2451 STFD y14, 13 * SIZE(Y1)
2452 STFD y15, 14 * SIZE(Y1)
2453 STFD y16, 15 * SIZE(Y1)
2455 addi AO1, AO1, 16 * SIZE
2456 addi AO2, AO2, 16 * SIZE
2457 addi Y1, Y1, 16 * SIZE
2467 LFD y01, 0 * SIZE(Y1)
2468 LFD y02, 1 * SIZE(Y1)
2469 LFD y03, 2 * SIZE(Y1)
2470 LFD y04, 3 * SIZE(Y1)
2471 LFD y05, 4 * SIZE(Y1)
2472 LFD y06, 5 * SIZE(Y1)
2473 LFD y07, 6 * SIZE(Y1)
2474 LFD y08, 7 * SIZE(Y1)
2476 LFD a1, 0 * SIZE(AO1)
2477 LFD a2, 1 * SIZE(AO1)
2478 LFD a3, 2 * SIZE(AO1)
2479 LFD a4, 3 * SIZE(AO1)
2480 LFD a5, 4 * SIZE(AO1)
2481 LFD a6, 5 * SIZE(AO1)
2482 LFD a7, 6 * SIZE(AO1)
2483 LFD a8, 7 * SIZE(AO1)
2485 FMADD y01, alpha1, a1, y01
2486 LFD a1, 0 * SIZE(AO2)
2487 FMADD y02, alpha1, a2, y02
2488 LFD a2, 1 * SIZE(AO2)
2489 FMADD y03, alpha1, a3, y03
2490 LFD a3, 2 * SIZE(AO2)
2491 FMADD y04, alpha1, a4, y04
2492 LFD a4, 3 * SIZE(AO2)
2493 FMADD y05, alpha1, a5, y05
2494 LFD a5, 4 * SIZE(AO2)
2495 FMADD y06, alpha1, a6, y06
2496 LFD a6, 5 * SIZE(AO2)
2497 FMADD y07, alpha1, a7, y07
2498 LFD a7, 6 * SIZE(AO2)
2499 FMADD y08, alpha1, a8, y08
2500 LFD a8, 7 * SIZE(AO2)
2502 FMADD y01, alpha2, a1, y01
2503 FMADD y02, alpha2, a2, y02
2504 FMADD y03, alpha2, a3, y03
2505 FMADD y04, alpha2, a4, y04
2506 FMADD y05, alpha2, a5, y05
2507 FMADD y06, alpha2, a6, y06
2508 FMADD y07, alpha2, a7, y07
2509 FMADD y08, alpha2, a8, y08
2511 STFD y01, 0 * SIZE(Y1)
2512 STFD y02, 1 * SIZE(Y1)
2513 STFD y03, 2 * SIZE(Y1)
2514 STFD y04, 3 * SIZE(Y1)
2515 STFD y05, 4 * SIZE(Y1)
2516 STFD y06, 5 * SIZE(Y1)
2517 STFD y07, 6 * SIZE(Y1)
2518 STFD y08, 7 * SIZE(Y1)
2520 addi AO1, AO1, 8 * SIZE
2521 addi AO2, AO2, 8 * SIZE
2522 addi Y1, Y1, 8 * SIZE
2529 LFD y01, 0 * SIZE(Y1)
2530 LFD y02, 1 * SIZE(Y1)
2531 LFD y03, 2 * SIZE(Y1)
2532 LFD y04, 3 * SIZE(Y1)
2534 LFD a1, 0 * SIZE(AO1)
2535 LFD a2, 1 * SIZE(AO1)
2536 LFD a3, 2 * SIZE(AO1)
2537 LFD a4, 3 * SIZE(AO1)
2539 LFD a5, 0 * SIZE(AO2)
2540 LFD a6, 1 * SIZE(AO2)
2541 LFD a7, 2 * SIZE(AO2)
2542 LFD a8, 3 * SIZE(AO2)
2544 FMADD y01, alpha1, a1, y01
2545 FMADD y02, alpha1, a2, y02
2546 FMADD y03, alpha1, a3, y03
2547 FMADD y04, alpha1, a4, y04
2549 FMADD y01, alpha2, a5, y01
2550 FMADD y02, alpha2, a6, y02
2551 FMADD y03, alpha2, a7, y03
2552 FMADD y04, alpha2, a8, y04
2554 STFD y01, 0 * SIZE(Y1)
2555 STFD y02, 1 * SIZE(Y1)
2556 STFD y03, 2 * SIZE(Y1)
2557 STFD y04, 3 * SIZE(Y1)
2559 addi AO1, AO1, 4 * SIZE
2560 addi AO2, AO2, 4 * SIZE
2561 addi Y1, Y1, 4 * SIZE
2568 LFD y01, 0 * SIZE(Y1)
2569 LFD y02, 1 * SIZE(Y1)
2571 LFD a1, 0 * SIZE(AO1)
2572 LFD a2, 1 * SIZE(AO1)
2573 LFD a3, 0 * SIZE(AO2)
2574 LFD a4, 1 * SIZE(AO2)
2576 FMADD y01, alpha1, a1, y01
2577 FMADD y02, alpha1, a2, y02
2578 FMADD y01, alpha2, a3, y01
2579 FMADD y02, alpha2, a4, y02
2581 STFD y01, 0 * SIZE(Y1)
2582 STFD y02, 1 * SIZE(Y1)
2584 addi AO1, AO1, 2 * SIZE
2585 addi AO2, AO2, 2 * SIZE
2586 addi Y1, Y1, 2 * SIZE
2593 LFD y01, 0 * SIZE(Y1)
2595 LFD a1, 0 * SIZE(AO1)
2596 LFD a2, 0 * SIZE(AO2)
2598 FMADD y01, alpha1, a1, y01
2599 FMADD y01, alpha2, a2, y01
2601 STFD y01, 0 * SIZE(Y1)
2610 LFD alpha1, 0 * SIZE(X)
2611 FMUL alpha1, alpha, alpha1
2620 LFD y01, 0 * SIZE(Y1)
2621 LFD y02, 1 * SIZE(Y1)
2622 LFD y03, 2 * SIZE(Y1)
2623 LFD y04, 3 * SIZE(Y1)
2624 LFD y05, 4 * SIZE(Y1)
2625 LFD y06, 5 * SIZE(Y1)
2626 LFD y07, 6 * SIZE(Y1)
2627 LFD y08, 7 * SIZE(Y1)
2629 LFD a1, 0 * SIZE(AO1)
2630 LFD a2, 1 * SIZE(AO1)
2631 LFD a3, 2 * SIZE(AO1)
2632 LFD a4, 3 * SIZE(AO1)
2633 LFD a5, 4 * SIZE(AO1)
2634 LFD a6, 5 * SIZE(AO1)
2635 LFD a7, 6 * SIZE(AO1)
2636 LFD a8, 7 * SIZE(AO1)
2638 LFD y09, 8 * SIZE(Y1)
2639 LFD y10, 9 * SIZE(Y1)
2640 LFD y11, 10 * SIZE(Y1)
2641 LFD y12, 11 * SIZE(Y1)
2642 LFD y13, 12 * SIZE(Y1)
2643 LFD y14, 13 * SIZE(Y1)
2644 LFD y15, 14 * SIZE(Y1)
2645 LFD y16, 15 * SIZE(Y1)
2651 FMADD y01, alpha1, a1, y01
2652 LFD a1, 8 * SIZE(AO1)
2653 FMADD y02, alpha1, a2, y02
2654 LFD a2, 9 * SIZE(AO1)
2655 FMADD y03, alpha1, a3, y03
2656 LFD a3, 10 * SIZE(AO1)
2657 FMADD y04, alpha1, a4, y04
2658 LFD a4, 11 * SIZE(AO1)
2660 FMADD y05, alpha1, a5, y05
2661 LFD a5, 12 * SIZE(AO1)
2662 FMADD y06, alpha1, a6, y06
2663 LFD a6, 13 * SIZE(AO1)
2664 FMADD y07, alpha1, a7, y07
2665 LFD a7, 14 * SIZE(AO1)
2666 FMADD y08, alpha1, a8, y08
2667 LFD a8, 15 * SIZE(AO1)
2669 FMADD y09, alpha1, a1, y09
2670 LFD a1, 16 * SIZE(AO1)
2671 FMADD y10, alpha1, a2, y10
2672 LFD a2, 17 * SIZE(AO1)
2673 FMADD y11, alpha1, a3, y11
2674 LFD a3, 18 * SIZE(AO1)
2675 FMADD y12, alpha1, a4, y12
2676 LFD a4, 19 * SIZE(AO1)
2678 FMADD y13, alpha1, a5, y13
2679 LFD a5, 20 * SIZE(AO1)
2680 FMADD y14, alpha1, a6, y14
2681 LFD a6, 21 * SIZE(AO1)
2682 FMADD y15, alpha1, a7, y15
2683 LFD a7, 22 * SIZE(AO1)
2684 FMADD y16, alpha1, a8, y16
2685 LFD a8, 23 * SIZE(AO1)
2687 STFD y01, 0 * SIZE(Y1)
2688 LFD y01, 16 * SIZE(Y1)
2689 STFD y02, 1 * SIZE(Y1)
2690 LFD y02, 17 * SIZE(Y1)
2692 STFD y03, 2 * SIZE(Y1)
2693 LFD y03, 18 * SIZE(Y1)
2694 STFD y04, 3 * SIZE(Y1)
2695 LFD y04, 19 * SIZE(Y1)
2697 STFD y05, 4 * SIZE(Y1)
2698 LFD y05, 20 * SIZE(Y1)
2699 STFD y06, 5 * SIZE(Y1)
2700 LFD y06, 21 * SIZE(Y1)
2702 STFD y07, 6 * SIZE(Y1)
2703 LFD y07, 22 * SIZE(Y1)
2704 STFD y08, 7 * SIZE(Y1)
2705 LFD y08, 23 * SIZE(Y1)
2707 STFD y09, 8 * SIZE(Y1)
2708 LFD y09, 24 * SIZE(Y1)
2709 STFD y10, 9 * SIZE(Y1)
2710 LFD y10, 25 * SIZE(Y1)
2712 STFD y11, 10 * SIZE(Y1)
2713 LFD y11, 26 * SIZE(Y1)
2714 STFD y12, 11 * SIZE(Y1)
2715 LFD y12, 27 * SIZE(Y1)
2717 STFD y13, 12 * SIZE(Y1)
2718 LFD y13, 28 * SIZE(Y1)
2719 STFD y14, 13 * SIZE(Y1)
2720 LFD y14, 29 * SIZE(Y1)
2722 STFD y15, 14 * SIZE(Y1)
2723 LFD y15, 30 * SIZE(Y1)
2724 STFD y16, 15 * SIZE(Y1)
2725 LFD y16, 31 * SIZE(Y1)
2727 addi AO1, AO1, 16 * SIZE
2728 addi Y1, Y1, 16 * SIZE
2737 FMADD y01, alpha1, a1, y01
2738 LFD a1, 8 * SIZE(AO1)
2739 FMADD y02, alpha1, a2, y02
2740 LFD a2, 9 * SIZE(AO1)
2741 FMADD y03, alpha1, a3, y03
2742 LFD a3, 10 * SIZE(AO1)
2743 FMADD y04, alpha1, a4, y04
2744 LFD a4, 11 * SIZE(AO1)
2746 FMADD y05, alpha1, a5, y05
2747 LFD a5, 12 * SIZE(AO1)
2748 FMADD y06, alpha1, a6, y06
2749 LFD a6, 13 * SIZE(AO1)
2750 FMADD y07, alpha1, a7, y07
2751 LFD a7, 14 * SIZE(AO1)
2752 FMADD y08, alpha1, a8, y08
2753 LFD a8, 15 * SIZE(AO1)
2755 FMADD y09, alpha1, a1, y09
2756 FMADD y10, alpha1, a2, y10
2757 FMADD y11, alpha1, a3, y11
2758 FMADD y12, alpha1, a4, y12
2759 FMADD y13, alpha1, a5, y13
2760 FMADD y14, alpha1, a6, y14
2761 FMADD y15, alpha1, a7, y15
2762 FMADD y16, alpha1, a8, y16
2764 STFD y01, 0 * SIZE(Y1)
2765 STFD y02, 1 * SIZE(Y1)
2766 STFD y03, 2 * SIZE(Y1)
2767 STFD y04, 3 * SIZE(Y1)
2768 STFD y05, 4 * SIZE(Y1)
2769 STFD y06, 5 * SIZE(Y1)
2770 STFD y07, 6 * SIZE(Y1)
2771 STFD y08, 7 * SIZE(Y1)
2773 STFD y09, 8 * SIZE(Y1)
2774 STFD y10, 9 * SIZE(Y1)
2775 STFD y11, 10 * SIZE(Y1)
2776 STFD y12, 11 * SIZE(Y1)
2777 STFD y13, 12 * SIZE(Y1)
2778 STFD y14, 13 * SIZE(Y1)
2779 STFD y15, 14 * SIZE(Y1)
2780 STFD y16, 15 * SIZE(Y1)
2782 addi AO1, AO1, 16 * SIZE
2783 addi Y1, Y1, 16 * SIZE
2793 LFD y01, 0 * SIZE(Y1)
2794 LFD y02, 1 * SIZE(Y1)
2795 LFD y03, 2 * SIZE(Y1)
2796 LFD y04, 3 * SIZE(Y1)
2798 LFD a1, 0 * SIZE(AO1)
2799 LFD a2, 1 * SIZE(AO1)
2800 LFD a3, 2 * SIZE(AO1)
2801 LFD a4, 3 * SIZE(AO1)
2803 LFD y05, 4 * SIZE(Y1)
2804 LFD y06, 5 * SIZE(Y1)
2805 LFD y07, 6 * SIZE(Y1)
2806 LFD y08, 7 * SIZE(Y1)
2808 LFD a5, 4 * SIZE(AO1)
2809 LFD a6, 5 * SIZE(AO1)
2810 LFD a7, 6 * SIZE(AO1)
2811 LFD a8, 7 * SIZE(AO1)
2813 FMADD y01, alpha1, a1, y01
2814 FMADD y02, alpha1, a2, y02
2815 FMADD y03, alpha1, a3, y03
2816 FMADD y04, alpha1, a4, y04
2818 FMADD y05, alpha1, a5, y05
2819 FMADD y06, alpha1, a6, y06
2820 FMADD y07, alpha1, a7, y07
2821 FMADD y08, alpha1, a8, y08
2823 STFD y01, 0 * SIZE(Y1)
2824 STFD y02, 1 * SIZE(Y1)
2825 STFD y03, 2 * SIZE(Y1)
2826 STFD y04, 3 * SIZE(Y1)
2828 STFD y05, 4 * SIZE(Y1)
2829 STFD y06, 5 * SIZE(Y1)
2830 STFD y07, 6 * SIZE(Y1)
2831 STFD y08, 7 * SIZE(Y1)
2833 addi AO1, AO1, 8 * SIZE
2834 addi Y1, Y1, 8 * SIZE
2841 LFD y01, 0 * SIZE(Y1)
2842 LFD y02, 1 * SIZE(Y1)
2843 LFD y03, 2 * SIZE(Y1)
2844 LFD y04, 3 * SIZE(Y1)
2846 LFD a1, 0 * SIZE(AO1)
2847 LFD a2, 1 * SIZE(AO1)
2848 LFD a3, 2 * SIZE(AO1)
2849 LFD a4, 3 * SIZE(AO1)
2851 FMADD y01, alpha1, a1, y01
2852 FMADD y02, alpha1, a2, y02
2853 FMADD y03, alpha1, a3, y03
2854 FMADD y04, alpha1, a4, y04
2856 STFD y01, 0 * SIZE(Y1)
2857 STFD y02, 1 * SIZE(Y1)
2858 STFD y03, 2 * SIZE(Y1)
2859 STFD y04, 3 * SIZE(Y1)
2861 addi AO1, AO1, 4 * SIZE
2862 addi Y1, Y1, 4 * SIZE
2869 LFD y01, 0 * SIZE(Y1)
2870 LFD y02, 1 * SIZE(Y1)
2872 LFD a1, 0 * SIZE(AO1)
2873 LFD a2, 1 * SIZE(AO1)
2875 FMADD y01, alpha1, a1, y01
2876 FMADD y02, alpha1, a2, y02
2878 STFD y01, 0 * SIZE(Y1)
2879 STFD y02, 1 * SIZE(Y1)
2881 addi AO1, AO1, 2 * SIZE
2882 addi Y1, Y1, 2 * SIZE
2889 LFD y01, 0 * SIZE(Y1)
2890 LFD a1, 0 * SIZE(AO1)
2892 FMADD y01, alpha1, a1, y01
2893 STFD y01, 0 * SIZE(Y1)
2897 cmpi cr0, 0, INCY, SIZE
2926 LFD f8, 0 * SIZE(YY)
2927 LFD f9, 1 * SIZE(YY)
2928 LFD f10, 2 * SIZE(YY)
2929 LFD f11, 3 * SIZE(YY)
2930 LFD f12, 4 * SIZE(YY)
2931 LFD f13, 5 * SIZE(YY)
2932 LFD f14, 6 * SIZE(YY)
2933 LFD f15, 7 * SIZE(YY)
2934 addi YY, YY, 8 * SIZE
2945 STFD f8, 0 * SIZE(Y1)
2947 STFD f9, 0 * SIZE(Y1)
2949 STFD f10, 0 * SIZE(Y1)
2951 STFD f11, 0 * SIZE(Y1)
2953 STFD f12, 0 * SIZE(Y1)
2955 STFD f13, 0 * SIZE(Y1)
2957 STFD f14, 0 * SIZE(Y1)
2959 STFD f15, 0 * SIZE(Y1)
2977 LFD f8, 0 * SIZE(YY)
2978 LFD f9, 1 * SIZE(YY)
2979 LFD f10, 2 * SIZE(YY)
2980 LFD f11, 3 * SIZE(YY)
2981 addi YY, YY, 4 * SIZE
2988 STFD f8, 0 * SIZE(Y1)
2990 STFD f9, 0 * SIZE(Y1)
2992 STFD f10, 0 * SIZE(Y1)
2994 STFD f11, 0 * SIZE(Y1)
3007 LFD f8, 0 * SIZE(YY)
3008 LFD f9, 1 * SIZE(YY)
3009 addi YY, YY, 2 * SIZE
3014 STFD f8, 0 * SIZE(Y1)
3016 STFD f9, 0 * SIZE(Y1)
3025 LFD f8, 0 * SIZE(YY)
3029 STFD f8, 0 * SIZE(Y1)
3086 addi SP, SP, STACKSIZE