1 dnl PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
3 dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007 Free Software
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published
10 dnl by the Free Software Foundation; either version 3 of the License, or (at
11 dnl your option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
21 include(`../config.m4')
27 C n POWER3/PPC630 POWER4/PPC970
55 C This code is a little bit slower for POWER3/PPC630 than the simple code used
56 C previously, but it is much faster for POWER4/PPC970. The reason for the
57 C POWER3/PPC630 slowdown can be attributed to the saving and restoring of 4
66 ifdef(`OPERATION_add_n',`
69 define(func, mpn_add_n)
70 define(func_nc, mpn_add_nc)
71 define(GENRVAL, `addi r3, r3, 1')
72 define(SETCBR, `addic r0, $1, -1')
73 define(CLRCB, `addic r0, r0, 0')
75 ifdef(`OPERATION_sub_n',`
76 define(ADDSUBC, subfe)
78 define(func, mpn_sub_n)
79 define(func_nc, mpn_sub_nc)
80 define(GENRVAL, `neg r3, r3')
81 define(SETCBR, `subfic r0, $1, 0')
82 define(CLRCB, `addic r0, r1, -1')
85 MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
95 L(ent): std r31, -8(r1)
100 rldicl. r0, r6, 0,62 C r0 = n & 3, set cr0
102 addi r6, r6, 3 C compute count...
103 srdi r6, r6, 2 C ...for ctr
104 mtctr r6 C copy count into ctr
109 L(b11): ld r8, 0(r4) C load s1 limb
110 ld r9, 0(r5) C load s2 limb
111 ld r10, 8(r4) C load s1 limb
112 ld r11, 8(r5) C load s2 limb
113 ld r12, 16(r4) C load s1 limb
115 ld r0, 16(r5) C load s2 limb
118 ADDSUBC r30, r11, r10
127 L(b01): ld r12, 0(r4) C load s1 limb
129 ld r0, 0(r5) C load s2 limb
131 ADDSUBC r31, r0, r12 C add
137 L(b10): ld r10, 0(r4) C load s1 limb
138 ld r11, 0(r5) C load s2 limb
139 ld r12, 8(r4) C load s1 limb
141 ld r0, 8(r5) C load s2 limb
143 ADDSUBC r30, r11, r10 C add
144 ADDSUBC r31, r0, r12 C add
151 L(b00): C INITCY C clear/set cy
152 L(go): ld r6, 0(r4) C load s1 limb
153 ld r7, 0(r5) C load s2 limb
154 ld r8, 8(r4) C load s1 limb
155 ld r9, 8(r5) C load s2 limb
156 ld r10, 16(r4) C load s1 limb
157 ld r11, 16(r5) C load s2 limb
158 ld r12, 24(r4) C load s1 limb
159 ld r0, 24(r5) C load s2 limb
165 L(oop): ADDSUBC r28, r7, r6
166 ld r6, 0(r4) C load s1 limb
167 ld r7, 0(r5) C load s2 limb
169 ld r8, 8(r4) C load s1 limb
170 ld r9, 8(r5) C load s2 limb
171 ADDSUBC r30, r11, r10
172 ld r10, 16(r4) C load s1 limb
173 ld r11, 16(r5) C load s2 limb
175 ld r12, 24(r4) C load s1 limb
176 ld r0, 24(r5) C load s2 limb
184 bdnz L(oop) C decrement ctr and loop back
186 L(end): ADDSUBC r28, r7, r6
188 ADDSUBC r30, r11, r10
195 L(ret): ld r31, -8(r1)
200 subfe r3, r0, r0 C -cy