1 /* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
3 Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003 Free Software Foundation,
6 This file is part of the GNU MP Library.
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
13 The GNU MP Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 #include "mpn/sparc64/sparc64.h"
28 /* 64-bit divisor 32-bit divisor
29 cycles/limb cycles/limb
31 Ultrasparc 2i: 160 120
35 /* 32-bit divisors are treated in special case code. This requires 4 mulx
36 per limb instead of 8 in the general case.
38 For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
39 addressing, to get the two halves of each limb read in the correct order.
40 This is kept in an adj variable. Doing that measures about 6 c/l faster
41 than just writing HALF_ENDIAN_ADJ(i) in the loop. The latter shouldn't
42 be 6 cycles worth of work, but perhaps it doesn't schedule well (on gcc
45 A simple udivx/umulx loop for the 32-bit case was attempted for small
46 sizes, but at size==2 it was only about the same speed and at size==3 was
50 mpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
52 int norm, norm_rshift;
53 mp_limb_t src_high_limb;
56 ASSERT (size_limbs >= 0);
59 if (UNLIKELY (size_limbs == 0))
62 src_high_limb = src_limbptr[size_limbs-1];
64 /* udivx is good for size==1, and no need to bother checking limb<divisor,
65 since if that's likely the caller should check */
66 if (UNLIKELY (size_limbs == 1))
67 return src_high_limb % d_limb;
69 if (d_limb <= CNST_LIMB(0xFFFFFFFF))
71 unsigned *src, n1, n0, r, dummy_q, nshift, norm_rmask;
75 size = 2 * size_limbs; /* halfwords */
76 src = (unsigned *) src_limbptr;
78 /* prospective initial remainder, if < d */
79 r = src_high_limb >> 32;
81 /* If the length of the source is uniformly distributed, then there's
82 a 50% chance of the high 32-bits being zero, which we can skip. */
85 r = (unsigned) src_high_limb;
87 ASSERT (size > 0); /* because always even */
90 /* Skip a division if high < divisor. Having the test here before
91 normalizing will still skip as often as possible. */
95 ASSERT (size > 0); /* because size==1 handled above */
100 count_leading_zeros_32 (norm, d_limb);
104 norm_rshift = 32 - norm;
105 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
107 adj = HALF_ENDIAN_ADJ (i);
109 r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
111 invert_half_limb (dinv_limb, d_limb);
114 for (i--; i >= 0; i--)
118 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
119 udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
123 /* same as loop, but without n0 */
125 udiv_qrnnd_half_preinv (dummy_q, r, r, nshift, d_limb, dinv_limb);
127 ASSERT ((r & ((1 << norm) - 1)) == 0);
134 mp_limb_t n1, n0, r, dinv, dummy_q, nshift, norm_rmask;
138 r = src_high_limb; /* initial remainder */
140 /* Skip a division if high < divisor. Having the test here before
141 normalizing will still skip as often as possible. */
145 ASSERT (size > 0); /* because size==1 handled above */
150 count_leading_zeros (norm, d_limb);
153 norm_rshift = GMP_LIMB_BITS - norm;
154 norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
158 r = (r << norm) | ((n1 >> norm_rshift) & norm_rmask);
160 invert_limb (dinv, d_limb);
162 for (i = size-2; i >= 0; i--)
165 nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
166 udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
170 /* same as loop, but without n0 */
172 udiv_qrnnd_preinv (dummy_q, r, r, nshift, d_limb, dinv);
174 ASSERT ((r & ((CNST_LIMB(1) << norm) - 1)) == 0);