1 dnl Alpha mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
3 dnl Copyright 2007, 2008 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
28 C * Perhaps inline mpn_invert_limb, that would allow us to not save/restore
29 C any registers (thus save ~10 cycles per call).
30 C * Use negated d1 and/or d0 to speed carry propagation. Might save a cycle
32 C * Check cluster delays (for ev6). We very likely could save some cycles.
33 C * Use branch-free code for computing di.
34 C * CAVEAT: We rely on r19 not being clobbered by mpn_invert_limb call.
39 define(`up_param', `r18')
40 define(`un_param', `r19')
44 PROLOGUE(mpn_divrem_2)
65 bis r31, r31, r3 C most_significant_q_limb = 0
75 lda r3, 1(r31) C most_significant_q_limb = 1
76 L(L8): stq r3, 72(r30)
82 jsr r26, mpn_invert_limb
84 mulq r0, r12, r4 C t0 = LO(di * d1)
85 umulh r0, r10, r2 C s1 = HI(di * d0)
86 addq r4, r10, r4 C t0 += d0
87 cmpule r10, r4, r7 C (t0 < d0)
88 addq r4, r2, r4 C t0 += s1
90 subq r1, r7, r7 C t1 (-1, 0, or 1)
94 cmpult r4, r12, r1 C cy for: t0 -= d1 (below)
95 subq r7, r1, r7 C t1 -= cy
96 subq r4, r12, r4 C t0 -= d1
103 mulq r11, r0, r5 C q0 (early)
104 umulh r11, r0, r6 C q (early)
105 addq r5, r9, r8 C q0 += n1
106 addq r6, r11, r6 C q += n2
107 cmpult r8, r5, r1 C cy for: q0 += n1
108 addq r6, r1, r6 C q += cy
110 mulq r12, r6, r1 C LO(d1 * q)
111 umulh r10, r6, r7 C t1 = HI(d0 * q)
112 subq r9, r1, r9 C n1 -= LO(d1 * q)
113 mulq r10, r6, r4 C t0 = LO(d0 * q)
115 cmple r15, r19, r5 C condition and n0...
119 L(L31): subq r9, r12, r9 C n1 -= d1
122 subq r5, r10, r5 C n0 -= d0
123 subq r9, r7, r9 C n1 -= t0
126 subq r5, r4, r5 C n0 -= t1
127 cmpult r2, r8, r1 C (n1 < q0)
128 addq r6, r1, r6 C q += cond
129 lda r1, -1(r1) C -(n1 >= q0)
131 addq r5, r4, r9 C n0 += mask & d0
133 cmpult r9, r5, r11 C cy for: n0 += mask & d0
134 addq r2, r1, r1 C n1 += mask & d1
135 addq r1, r11, r11 C n1 += cy
136 cmpult r11, r12, r1 C
138 L(bck): stq r6, 0(r16)
143 L(L10): stq r9, 8(r13)
157 L(fix): cmpule r11, r12, r1