1 dnl Alpha mpn_bdiv_dbm1c.
3 dnl Copyright 2008 Free Software Foundation, Inc.
5 dnl This file is part of the GNU MP Library.
7 dnl The GNU MP Library is free software; you can redistribute it and/or modify
8 dnl it under the terms of the GNU Lesser General Public License as published
9 dnl by the Free Software Foundation; either version 3 of the License, or (at
10 dnl your option) any later version.
12 dnl The GNU MP Library is distributed in the hope that it will be useful, but
13 dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 dnl License for more details.
17 dnl You should have received a copy of the GNU Lesser General Public License
18 dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
20 include(`../config.m4')
28 C * Try less unrolling, 2-way should give the same performance.
29 C * Optimize feed-in and wind-down code, for speed, and perhaps further for
31 C * This runs optimally given the algorithm, r8 is on a 3 operation recurrency
32 C path. We have not tried very hard to find a better algorithm. Perhaps
33 C it would be a good task for the GNU superoptimizer.
44 PROLOGUE(mpn_bdiv_dbm1c)
61 mulq r24, r19, r5 C U1
62 umulh r24, r19, r21 C U1
64 umulh r2, r19, r22 C U1
66 umulh r3, r19, r23 C U1
70 L(gt3): ldq r0, 24(r17)
71 mulq r24, r19, r5 C U1
72 umulh r24, r19, r21 C U1
75 umulh r2, r19, r22 C U1
78 umulh r3, r19, r23 C U1
91 mulq r24, r19, r6 C U1
92 umulh r24, r19, r22 C U1
94 umulh r3, r19, r23 C U1
98 L(gt2): ldq r0, 16(r17)
100 mulq r24, r19, r6 C U1
101 umulh r24, r19, r22 C U1
103 mulq r3, r19, r7 C U1
104 umulh r3, r19, r23 C U1
108 mulq r0, r19, r4 C U1
109 umulh r0, r19, r20 C U1
113 mulq r1, r19, r5 C U1
116 L(gt6): ldq r0, 0(r17)
117 mulq r1, r19, r5 C U1
121 L(b1): bgt r18, L(gt1)
123 mulq r24, r19, r7 C U1
124 umulh r24, r19, r23 C U1
128 L(gt1): ldq r0, 8(r17)
131 mulq r24, r19, r7 C U1
132 umulh r24, r19, r23 C U1
136 mulq r0, r19, r4 C U1
137 umulh r0, r19, r20 C U1
141 mulq r1, r19, r5 C U1
142 umulh r1, r19, r21 C U1
143 mulq r2, r19, r6 C U1
146 L(gt5): ldq r0, 0(r17)
147 mulq r1, r19, r5 C U1
148 umulh r1, r19, r21 C U1
150 mulq r2, r19, r6 C U1
154 L(b0): ldq r1, 8(r17)
159 mulq r24, r19, r4 C U1
160 umulh r24, r19, r20 C U1
163 mulq r1, r19, r5 C U1
164 umulh r1, r19, r21 C U1
165 mulq r2, r19, r6 C U1
166 umulh r2, r19, r22 C U1
167 mulq r3, r19, r7 C U1
170 L(gt4): ldq r0, 0(r17)
171 mulq r1, r19, r5 C U1
172 umulh r1, r19, r21 C U1
174 mulq r2, r19, r6 C U1
175 umulh r2, r19, r22 C U1
177 mulq r3, r19, r7 C U1
180 C *** MAIN LOOP START ***
182 L(top): mulq r0, r19, r4 C U1
184 L(L3): umulh r0, r19, r20 C U1
191 mulq r1, r19, r5 C U1
193 L(L2): umulh r1, r19, r21 C U1
200 mulq r2, r19, r6 C U1
202 L(L1): umulh r2, r19, r22 C U1
209 mulq r3, r19, r7 C U1
211 L(L0): umulh r3, r19, r23 C U1
222 C *** MAIN LOOP END ***
224 mulq r0, r19, r4 C U1
226 L(cj7): umulh r0, r19, r20 C U1
231 mulq r1, r19, r5 C U1
233 L(cj6): umulh r1, r19, r21 C U1
238 mulq r2, r19, r6 C U1
240 L(cj5): umulh r2, r19, r22 C U1
245 mulq r3, r19, r7 C U1
247 L(cj4): umulh r3, r19, r23 C U1
253 L(cj3): cmpult r8, r5, r28
258 L(cj2): cmpult r8, r6, r28
263 L(cj1): cmpult r8, r7, r28