1 dnl IA-64 mpn_divrem_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an
4 dnl Copyright 2002, 2004, 2005 Free Software Foundation, Inc.
6 dnl This file is part of the GNU MP Library.
8 dnl The GNU MP Library is free software; you can redistribute it and/or modify
9 dnl it under the terms of the GNU Lesser General Public License as published
10 dnl by the Free Software Foundation; either version 3 of the License, or (at
11 dnl your option) any later version.
13 dnl The GNU MP Library is distributed in the hope that it will be useful, but
14 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 dnl License for more details.
18 dnl You should have received a copy of the GNU Lesser General Public License
19 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
21 include(`../config.m4')
28 C This was generated by gcc, then the loops were optimized. The preinv entry
29 C point was shoehorned into the file. Lots of things outside the loops could
30 C be streamlined. It would probably be a good idea to merge the loops for
31 C normalized and unnormalized divisor, since the shifting stuff is done for
32 C free in parallel with other operations. It would even be possible to merge
33 C all loops, if the ld8 were made conditional.
36 C * Consider delaying inversion for normalized mpn_divrem_1 entry till after
37 C computing leading limb.
38 C * Inline and interleave limb inversion code with loop setup code.
42 C HP's assembler requires these declarations for importing mpn_invert_limb
43 .global mpn_invert_limb
44 .type mpn_invert_limb,@function
52 C vlinv = r37 (preinv only)
53 C cnt = r38 (preinv only)
55 PROLOGUE(mpn_preinv_divrem_1)
58 alloc r42 = ar.pfs, 7, 8, 1, 0
72 shladd r34 = r35, 3, r34
82 shladd r32 = r15, 3, r32 C r32 = rp + n + qxn
83 cmp.le p8, p0 = 0, r36
85 adds r32 = -8, r32 C r32 = rp + n + qxn - 1
86 cmp.leu p6, p7 = r36, r39
87 (p8) br.cond.dpnt .Lpunnorm
93 (p6) sub r38 = r39, r36
96 adds r35 = -2, r35 C un -= 2
100 (p6) add r34 = 8, r34
103 (p6) br.cond.dptk .Lpu
105 shl r38 = r39, r40 C r = ahigh << cnt
106 cmp.ne p8, p0 = 1, r35
108 adds r35 = -1, r35 C un--
109 (p8) br.cond.dpnt .Lpu
119 PROLOGUE(mpn_divrem_1)
122 alloc r42 = ar.pfs, 5, 8, 1, 0
138 cmp.ne p6, p7 = 0, r15
141 (p7) br.cond.dpnt .Lret
142 shladd r14 = r15, 3, r32 C r14 = rp + n + qxn
143 cmp.le p6, p7 = 0, r36
145 adds r32 = -8, r14 C r32 = rp + n + qxn - 1
146 (p6) br.cond.dpnt .Lunnorm
147 cmp.eq p6, p7 = 0, r35
148 (p6) br.cond.dpnt .L179
149 shladd r14 = r35, 3, r34
156 cmp.leu p6, p7 = r36, r38
158 (p6) addl r15 = 1, r0
162 (p6) sub r38 = r38, r36
167 br.call.sptk.many b0 = mpn_invert_limb
169 shladd r34 = r35, 3, r34
175 cmp.le p6, p7 = 0, r35
177 (p7) br.cond.dpnt .L435
183 C Develop quotient limbs for normalized divisor
184 .Loop1: C 00 C q=r18 nh=r38/f7
186 xma.hu f11 = f7, f6, f0
188 xma.l f8 = f11, f12, f7 C q = q + nh
191 xma.hu f9 = f8, f10, f0
192 xma.l f8 = f8, f10, f0
198 cmp.ltu p6, p7 = r20, r15
202 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
203 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
204 (p6) add r16 = -1, r16
205 (p0) cmp.ne.unc p6, p7 = r0, r0
207 (p8) cmp.ltu p6, p7 = r15, r36
208 (p8) sub r15 = r15, r36
209 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
211 .pred.rel "mutex",p6,p7
212 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
213 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
214 cmp.ltu p6, p7 = r15, r36 C speculative
215 sub r28 = r15, r36 C speculative, just for cmp
217 (p8) cmp.ltu p6, p7 = r28, r36 C redo last cmp if needed
219 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
221 (p6) setf.sig f7 = r15
222 (p7) sub r15 = r15, r36
223 (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
225 (p7) setf.sig f7 = r15
234 cmp.eq p6, p7 = 0, r35
235 (p6) br.cond.dpnt .L322
236 shladd r34 = r35, 3, r34
242 cmp.leu p6, p7 = r36, r39
243 (p6) br.cond.dptk .L322
248 cmp.ne p6, p7 = 1, r15
252 (p7) br.cond.dpnt .Lret
262 shladd r16 = r14, 3, r16
266 cmp.geu p6, p7 = 15, r14
268 (p7) shr.u r14 = r14, 4
269 (p7) adds r16 = 4, r16
271 cmp.geu p6, p7 = 3, r14
273 (p7) shr.u r14 = r14, 2
274 (p7) adds r16 = 2, r16
276 tbit.nz p6, p7 = r14, 1
278 .pred.rel "mutex",p6,p7
279 (p6) sub r40 = 62, r16
280 (p7) sub r40 = 63, r16
285 br.call.sptk.many b0 = mpn_invert_limb
292 cmp.eq p6, p7 = 0, r35
293 (p6) br.cond.dpnt .L435
298 cmp.le p6, p7 = 0, r35
303 (p7) br.cond.dpnt .Lend3
310 C Develop quotient limbs for unnormalized divisor
313 xma.hu f11 = f7, f6, f0
315 xma.l f8 = f11, f12, f7 C q = q + nh
318 xma.hu f9 = f8, f10, f0
320 xma.l f8 = f8, f10, f0
327 cmp.ltu p6, p7 = r20, r15
331 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
332 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
333 (p6) add r16 = -1, r16
334 (p0) cmp.ne.unc p6, p7 = r0, r0
336 (p8) cmp.ltu p6, p7 = r15, r36
337 (p8) sub r15 = r15, r36
338 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
340 .pred.rel "mutex",p6,p7
341 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
342 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
343 cmp.ltu p6, p7 = r15, r36 C speculative
344 sub r28 = r15, r36 C speculative, just for cmp
346 (p8) cmp.ltu p6, p7 = r28, r36 C redo last cmp if needed
348 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
350 (p6) setf.sig f7 = r15
351 (p7) sub r15 = r15, r36
352 (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
354 (p7) setf.sig f7 = r15
364 xma.hu f11 = f7, f6, f0
366 xma.l f8 = f11, f12, f7 C q = q + nh
369 xma.hu f9 = f8, f10, f0
371 xma.l f8 = f8, f10, f0
376 cmp.ltu p6, p7 = r20, r15
380 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
381 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
382 (p6) add r16 = -1, r16
383 (p0) cmp.ne.unc p6, p7 = r0, r0
385 (p8) cmp.ltu p6, p7 = r15, r36
386 (p8) sub r15 = r15, r36
387 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
389 .pred.rel "mutex",p6,p7
390 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
391 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
393 (p8) sub r15 = r15, r36
394 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
396 cmp.ltu p6, p7 = r15, r36
398 (p7) sub r15 = r15, r36
399 (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
405 cmp.le p6, p7 = 1, r33
406 (p7) br.cond.dpnt .Lend4
413 xma.hu f11 = f7, f6, f0
415 xma.l f8 = f11, f12, f7 C q = q + nh
418 xma.hu f9 = f8, f10, f0
419 xma.l f8 = f8, f10, f0
424 cmp.ltu p6, p7 = 0, r15
428 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0?
429 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0?
430 (p6) add r16 = -1, r16
431 (p0) cmp.ne.unc p6, p7 = r0, r0
433 (p8) cmp.ltu p6, p7 = r15, r36
434 (p8) sub r15 = r15, r36
435 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
437 .pred.rel "mutex",p6,p7
438 (p6) cmp.ne p8, p9 = 1, r16 C is rH != 0 still?
439 (p7) cmp.ne p8, p9 = 0, r16 C is rH != 0 still?
440 cmp.ltu p6, p7 = r15, r36 C speculative
441 sub r28 = r15, r36 C speculative, just for cmp
443 (p8) cmp.ltu p6, p7 = r28, r36 C redo last cmp if needed
445 (p8) add r18 = 1, r18 C q = q + 1; done if: rH > 0
447 (p6) setf.sig f7 = r15
448 (p7) sub r15 = r15, r36
449 (p7) add r18 = 1, r18 C q = q + 1; done if: rH > 0
451 (p7) setf.sig f7 = r15