1 dnl IA-64 mpn_add_n_sub_n -- mpn parallel addition and subtraction.
3 dnl Contributed to the GNU project by Torbjorn Granlund.
5 dnl Copyright 2010 Free Software Foundation, Inc.
7 dnl This file is part of the GNU MP Library.
9 dnl The GNU MP Library is free software; you can redistribute it and/or modify
10 dnl it under the terms of the GNU Lesser General Public License as published
11 dnl by the Free Software Foundation; either version 3 of the License, or (at
12 dnl your option) any later version.
14 dnl The GNU MP Library is distributed in the hope that it will be useful, but
15 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 dnl License for more details.
19 dnl You should have received a copy of the GNU Lesser General Public License
20 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
22 include(`../config.m4')
35 C Some useful aliases for registers we use
36 define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
37 define(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23')
38 define(`s0',`r24') define(`s1',`r25') define(`s2',`r26') define(`s3',`r27')
39 define(`d0',`r28') define(`d1',`r29') define(`d2',`r30') define(`d3',`r31')
45 define(`cmpltu', `cmp.ltu')
46 define(`cmpeqor', `cmp.eq.or')
49 PROLOGUE(mpn_add_n_sub_n)
54 addp4 sp = 0, sp C M I
55 addp4 dp = 0, dp C M I
56 addp4 up = 0, up C M I
57 addp4 vp = 0, vp C M I
64 add up1 = 8, up0 C M I
65 add vp1 = 8, vp0 C M I
67 add r10 = 256, up C M I
70 cmp.eq p10, p0 = 0, r9 C M I
71 cmp.eq p11, p0 = 2, r9 C M I
72 cmp.eq p12, p0 = 3, r9 C M I
73 add r11 = 256, vp C M I
80 L(b1): ld8 u3 = [up0], 8 C M01
81 add up1 = 8, up1 C M I
82 cmpltu p14, p15 = 4, n C M I
83 ld8 v3 = [vp0], 8 C M01
84 add vp1 = 8, vp1 C M I
90 cmpltu p9, p0 = s3, v3 C carry from add3 M I
91 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
93 st8 [sp] = s3, 8 C M23
94 st8 [dp] = d3, 8 C M23
97 L(b0): cmp.ne p9, p0 = r0, r0 C M I
98 cmp.ne p13, p0 = r0, r0 C M I
99 L(c0): ld8 u0 = [up0], 16 C M01
100 ld8 u1 = [up1], 16 C M01
102 ld8 v0 = [vp0], 16 C M01
103 ld8 v1 = [vp1], 16 C M01
105 ld8 u2 = [up0], 16 C M01
106 ld8 u3 = [up1], 16 C M01
108 ld8 v2 = [vp0], 16 C M01
109 ld8 v3 = [vp1], 16 C M01
111 add s0 = u0, v0 C M I
112 add s1 = u1, v1 C M I
113 sub d0 = u0, v0 C M I
114 sub d1 = u1, v1 C M I
116 cmpltu p6, p0 = s0, v0 C carry from add0 M I
117 cmpltu p7, p0 = s1, v1 C carry from add1 M I
118 cmpltu p10, p0 = u0, v0 C borrow from sub0 M I
119 cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
122 br.cloop.dptk L(top) C B
125 L(b3): ld8 u1 = [up0], 8 C M01
126 add up1 = 8, up1 C M I
127 ld8 v1 = [vp0], 8 C M01
129 add vp1 = 8, vp1 C M I
130 add s1 = u1, v1 C M I
131 sub d1 = u1, v1 C M I
133 cmpltu p7, p0 = s1, v1 C carry from add1 M I
134 cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
136 st8 [sp] = s1, 8 C M23
137 st8 [dp] = d1, 8 C M23
141 L(b2): cmp.ne p7, p0 = r0, r0 C M I
142 cmp.ne p11, p0 = r0, r0 C M I
144 L(c2): ld8 u2 = [up0], 16 C M01
145 ld8 u3 = [up1], 16 C M01
146 cmpltu p14, p0 = 4, n C M I
148 ld8 v2 = [vp0], 16 C M01
149 ld8 v3 = [vp1], 16 C M01
152 add s2 = u2, v2 C M I
153 add s3 = u3, v3 C M I
154 sub d2 = u2, v2 C M I
155 sub d3 = u3, v3 C M I
157 cmpltu p8, p0 = s2, v2 C carry from add0 M I
158 cmpltu p9, p0 = s3, v3 C carry from add3 M I
159 cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
160 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
163 L(gt4): ld8 u0 = [up0], 16 C M01
164 ld8 u1 = [up1], 16 C M01
166 ld8 v0 = [vp0], 16 C M01
167 ld8 v1 = [vp1], 16 C M01
169 add s2 = u2, v2 C M I
170 add s3 = u3, v3 C M I
171 sub d2 = u2, v2 C M I
172 sub d3 = u3, v3 C M I
174 cmpltu p8, p0 = s2, v2 C carry from add0 M I
175 cmpltu p9, p0 = s3, v3 C carry from add1 M I
176 cmpltu p12, p0 = u2, v2 C borrow from sub0 M I
177 cmpltu p13, p0 = u3, v3 C borrow from sub1 M I
178 br.cloop.dptk L(mid) C B
182 ld8 u0 = [up0], 16 C M01
183 ld8 u1 = [up1], 16 C M01
184 (p9) cmpeqor p6, p0 = -1, s0 C M I
185 (p9) add s0 = 1, s0 C M I
186 (p13) cmpeqor p10, p0 = 0, d0 C M I
187 (p13) add d0 = -1, d0 C M I
189 ld8 v0 = [vp0], 16 C M01
190 ld8 v1 = [vp1], 16 C M01
191 (p6) cmpeqor p7, p0 = -1, s1 C M I
192 (p6) add s1 = 1, s1 C M I
193 (p10) cmpeqor p11, p0 = 0, d1 C M I
194 (p10) add d1 = -1, d1 C M I
196 st8 [sp] = s0, 8 C M23
197 st8 [dp] = d0, 8 C M23
198 add s2 = u2, v2 C M I
199 add s3 = u3, v3 C M I
200 sub d2 = u2, v2 C M I
201 sub d3 = u3, v3 C M I
203 st8 [sp] = s1, 8 C M23
204 st8 [dp] = d1, 8 C M23
205 cmpltu p8, p0 = s2, v2 C carry from add2 M I
206 cmpltu p9, p0 = s3, v3 C carry from add3 M I
207 cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
208 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
211 ld8 u2 = [up0], 16 C M01
212 ld8 u3 = [up1], 16 C M01
213 (p7) cmpeqor p8, p0 = -1, s2 C M I
214 (p7) add s2 = 1, s2 C M I
215 (p11) cmpeqor p12, p0 = 0, d2 C M I
216 (p11) add d2 = -1, d2 C M I
218 ld8 v2 = [vp0], 16 C M01
219 ld8 v3 = [vp1], 16 C M01
220 (p8) cmpeqor p9, p0 = -1, s3 C M I
221 (p8) add s3 = 1, s3 C M I
222 (p12) cmpeqor p13, p0 = 0, d3 C M I
223 (p12) add d3 = -1, d3 C M I
225 st8 [sp] = s2, 8 C M23
226 st8 [dp] = d2, 8 C M23
227 add s0 = u0, v0 C M I
228 add s1 = u1, v1 C M I
229 sub d0 = u0, v0 C M I
230 sub d1 = u1, v1 C M I
232 st8 [sp] = s3, 8 C M23
233 st8 [dp] = d3, 8 C M23
234 cmpltu p6, p0 = s0, v0 C carry from add0 M I
235 cmpltu p7, p0 = s1, v1 C carry from add1 M I
236 cmpltu p10, p0 = u0, v0 C borrow from sub0 M I
237 cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
239 lfetch [r10], 32 C M?
240 lfetch [r11], 32 C M?
241 br.cloop.dptk L(top) C B
247 (p9) cmpeqor p6, p0 = -1, s0 C M I
248 (p9) add s0 = 1, s0 C M I
249 (p13) cmpeqor p10, p0 = 0, d0 C M I
250 (p13) add d0 = -1, d0 C M I
254 (p6) cmpeqor p7, p0 = -1, s1 C M I
255 (p6) add s1 = 1, s1 C M I
256 (p10) cmpeqor p11, p0 = 0, d1 C M I
257 (p10) add d1 = -1, d1 C M I
259 st8 [sp] = s0, 8 C M23
260 st8 [dp] = d0, 8 C M23
261 add s2 = u2, v2 C M I
262 add s3 = u3, v3 C M I
263 sub d2 = u2, v2 C M I
264 sub d3 = u3, v3 C M I
266 st8 [sp] = s1, 8 C M23
267 st8 [dp] = d1, 8 C M23
268 cmpltu p8, p0 = s2, v2 C carry from add2 M I
269 cmpltu p9, p0 = s3, v3 C carry from add3 M I
270 cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
271 cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
274 (p7) cmpeqor p8, p0 = -1, s2 C M I
275 (p7) add s2 = 1, s2 C M I
276 (p11) cmpeqor p12, p0 = 0, d2 C M I
277 (p11) add d2 = -1, d2 C M I
281 st8 [sp] = s2, 8 C M23
282 st8 [dp] = d2, 8 C M23
283 (p8) cmpeqor p9, p0 = -1, s3 C M I
284 (p8) add s3 = 1, s3 C M I
285 (p12) cmpeqor p13, p0 = 0, d3 C M I
286 (p12) add d3 = -1, d3 C M I
289 (p9) mov r8 = 2 C M I
291 mov.i ar.lc = r2 C I0
292 (p13) add r8 = 1, r8 C M I
295 br.ret.sptk.many b0 C B