1 dnl PowerPC-64 mpn_basecase.
3 dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008 Free Software
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published
10 dnl by the Free Software Foundation; either version 3 of the License, or (at
11 dnl your option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
21 include(`../config.m4')
37 define(`outer_rp', `r22')
38 define(`outer_up', `r23')
41 PROLOGUE(mpn_mul_basecase)
43 C Special code for un <= 2, for efficiency of these important cases,
44 C and since it simplifies the default code.
50 mulld r8, r5, r7 C weight 0
51 mulhdu r9, r5, r7 C weight 1
58 mulld r8, r0, r7 C weight 1
59 mulhdu r10, r0, r7 C weight 2
69 mulld r8, r5, r6 C weight 1
70 mulhdu r11, r5, r6 C weight 2
74 mulld r12, r0, r6 C weight 2
75 mulhdu r0, r0, r6 C weight 3
98 ld v0, 0(vp) C new v limb
102 rldicl. r0, un, 0,62 C r0 = n & 3, set cr0
104 addi un, un, 1 C compute count...
105 srdi un, un, 2 C ...for ctr
106 mtctr un C copy inner loop count into ctr
113 L(b3): mulld r0, r26, v0
170 mtctr un C copy inner loop count into ctr
173 addi outer_rp, outer_rp, 8
174 ld v0, 0(vp) C new v limb
186 ALIGN(16) C registers dying
189 mulhdu r10, r26, v0 C 26
193 mulhdu r8, r27, v0 C 27
196 adde r0, r0, r12 C 0 12
197 adde r24, r24, r10 C 24 10
199 mulhdu r10, r26, v0 C 26
203 mulhdu r12, r27, v0 C 27
206 adde r9, r9, r8 C 8 9
207 adde r11, r11, r10 C 10 11
209 addc r0, r0, r28 C 0 28
211 adde r24, r24, r29 C 7 29
213 adde r9, r9, r30 C 9 30
215 adde r11, r11, r31 C 11 31
312 mtctr un C copy inner loop count into ctr
313 addi rp, outer_rp, 16
315 addi outer_rp, outer_rp, 8
316 ld v0, 0(vp) C new v limb
338 ALIGN(16) C registers dying
341 mulhdu r10, r26, v0 C 26
345 mulhdu r8, r27, v0 C 27
348 adde r0, r0, r12 C 0 12
349 adde r24, r24, r10 C 24 10
351 mulhdu r10, r26, v0 C 26
355 mulhdu r12, r27, v0 C 27
358 adde r9, r9, r8 C 8 9
359 adde r11, r11, r10 C 10 11
361 addc r0, r0, r28 C 0 28
363 adde r24, r24, r29 C 7 29
365 adde r9, r9, r30 C 9 30
367 adde r11, r11, r31 C 11 31
469 mtctr un C copy inner loop count into ctr
470 addi rp, outer_rp, 24
471 addi up, outer_up, 16
472 addi outer_rp, outer_rp, 8
473 ld v0, 0(vp) C new v limb
500 ALIGN(16) C registers dying
503 mulhdu r10, r26, v0 C 26
507 mulhdu r8, r27, v0 C 27
510 adde r0, r0, r12 C 0 12
511 adde r24, r24, r10 C 24 10
513 mulhdu r10, r26, v0 C 26
517 mulhdu r12, r27, v0 C 27
520 adde r9, r9, r8 C 8 9
521 adde r11, r11, r10 C 10 11
523 addc r0, r0, r28 C 0 28
525 adde r24, r24, r29 C 7 29
527 adde r9, r9, r30 C 9 30
529 adde r11, r11, r31 C 11 31
617 mtctr un C copy inner loop count into ctr
619 addi up, outer_up, -8
620 addi outer_rp, outer_rp, 8
621 ld v0, 0(vp) C new v limb
628 ALIGN(16) C registers dying
631 mulhdu r10, r26, v0 C 26
635 mulhdu r8, r27, v0 C 27
638 adde r0, r0, r12 C 0 12
639 adde r24, r24, r10 C 24 10
641 mulhdu r10, r26, v0 C 26
645 mulhdu r12, r27, v0 C 27
648 adde r9, r9, r8 C 8 9
649 adde r11, r11, r10 C 10 11
651 addc r0, r0, r28 C 0 28
653 adde r24, r24, r29 C 7 29
655 adde r9, r9, r30 C 9 30
657 adde r11, r11, r31 C 11 31
687 L(ret): ld r31, -8(r1)