1 /* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
3 Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2003 Free Software
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 #include "mpn/sparc64/sparc64.h"
28 /* 64-bit divisor 32-bit divisor
29 cycles/limb cycles/limb
31 integer fraction integer fraction
32 Ultrasparc 2i: 160 160 122 96
36 /* 32-bit divisors are treated in special case code. This requires 4 mulx
37 per limb instead of 8 in the general case.
39 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
40 addressing, to get the two halves of each limb read in the correct order.
41 This is kept in an adj variable. Doing that measures about 4 c/l faster
42 than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter
43 shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
44 (on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we
45 still use a variable since that ought to work out best. */
48 mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
49 mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
51 mp_size_t total_size_limbs;
54 ASSERT (xsize_limbs >= 0);
55 ASSERT (size_limbs >= 0);
57 /* FIXME: What's the correct overlap rule when xsize!=0? */
58 ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
59 ap_limbptr, size_limbs));
61 total_size_limbs = size_limbs + xsize_limbs;
62 if (UNLIKELY (total_size_limbs == 0))
65 /* udivx is good for total_size==1, and no need to bother checking
66 limb<divisor, since if that's likely the caller should check */
67 if (UNLIKELY (total_size_limbs == 1))
70 a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
76 if (d_limb <= CNST_LIMB(0xFFFFFFFF))
78 mp_size_t size, xsize, total_size, adj;
79 unsigned *qp, n1, n0, q, r, nshift, norm_rmask;
82 int norm, norm_rshift;
84 size = 2 * size_limbs;
85 xsize = 2 * xsize_limbs;
86 total_size = size + xsize;
88 ap = (unsigned *) ap_limbptr;
89 qp = (unsigned *) qp_limbptr;
92 r = 0; /* initial remainder */
94 if (LIKELY (size != 0))
96 n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
98 /* If the length of the source is uniformly distributed, then
99 there's a 50% chance of the high 32-bits being zero, which we
103 n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
106 ASSERT (size > 0); /* because always even */
107 qp[size + HALF_ENDIAN_ADJ(1)] = 0;
110 /* Skip a division if high < divisor (high quotient 0). Testing
111 here before before normalizing will still skip as often as
117 qp[size + HALF_ENDIAN_ADJ(size)] = 0;
124 count_leading_zeros_32 (norm, d_limb);
129 norm_rshift = 32 - norm;
130 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
132 invert_half_limb (dinv_limb, d_limb);
134 if (LIKELY (size != 0))
137 adj = HALF_ENDIAN_ADJ (i);
140 r |= ((n1 >> norm_rshift) & norm_rmask);
145 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
146 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
151 udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
152 qp[0 + HALF_ENDIAN_ADJ(0)] = q;
155 adj = HALF_ENDIAN_ADJ (0);
156 for (i = xsize-1; i >= 0; i--)
158 udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
169 mp_size_t size, xsize, total_size;
170 mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask;
171 int norm, norm_rshift;
177 total_size = total_size_limbs;
180 qp += total_size; /* above high limb */
181 r = 0; /* initial remainder */
183 if (LIKELY (size != 0))
185 /* Skip a division if high < divisor (high quotient 0). Testing
186 here before before normalizing will still skip as often as
200 count_leading_zeros (norm, d);
204 norm_rshift = GMP_LIMB_BITS - norm;
205 norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
207 invert_limb (dinv, d);
209 if (LIKELY (size != 0))
212 r |= ((n1 >> norm_rshift) & norm_rmask);
213 for (i = size-2; i >= 0; i--)
216 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
217 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
222 udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
225 for (i = 0; i < xsize; i++)
227 udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);