1 /* mpn_dcpi1_divappr_q -- divide-and-conquer division, returning approximate
2 quotient. The quotient returned is either correct, or one too large.
4 Contributed to the GNU project by Torbjorn Granlund.
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
10 Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
33 mpn_dcpi1_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
34 gmp_pi1_t *dinv, mp_ptr tp)
39 lo = n >> 1; /* floor(n/2) */
40 hi = n - lo; /* ceil(n/2) */
42 if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
43 qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
45 qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);
47 mpn_mul (tp, qp + lo, hi, dp, lo);
49 cy = mpn_sub_n (np + lo, np + lo, tp, n);
51 cy += mpn_sub_n (np + n, np + n, dp, lo);
55 qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
56 cy -= mpn_add_n (np + lo, np + lo, dp, n);
59 if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD))
60 ql = mpn_sbpi1_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
62 ql = mpn_dcpi1_divappr_q_n (qp, np + hi, dp + hi, lo, dinv, tp);
64 if (UNLIKELY (ql != 0))
67 for (i = 0; i < lo; i++)
68 qp[i] = GMP_NUMB_MASK;
75 mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
76 mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv)
79 mp_limb_t qh, cy, qsave;
87 ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
96 qn++; /* pretend we'll need an extra limb */
97 /* Reduce qn mod dn without division, optimizing small operations. */
102 qp -= qn; /* point at low limb of next quotient block */
103 np -= qn; /* point in the middle of partial remainder */
105 tp = TMP_SALLOC_LIMBS (dn);
107 /* Perform the typically smaller block first. */
110 mp_limb_t q, n2, n1, n0, d1, d0;
112 /* Handle qh up front, for simplicity. */
113 qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0;
115 ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn));
117 /* A single iteration of schoolbook: One 3/2 division,
118 followed by the bignum update and adjustment. */
125 ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0));
127 if (UNLIKELY (n2 == d1) && n1 == d0)
130 cy = mpn_submul_1 (np - dn, dp - dn, dn, q);
135 udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32);
140 cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q);
143 n0 = (n0 - cy) & GMP_NUMB_MASK;
145 n1 = (n1 - cy1) & GMP_NUMB_MASK;
148 if (UNLIKELY (cy != 0))
150 n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1);
152 q = (q - 1) & GMP_NUMB_MASK;
165 qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2);
166 else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
167 qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32);
169 qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp);
174 mpn_mul (tp, qp, qn, dp - dn, dn - qn);
176 mpn_mul (tp, dp - dn, dn - qn, qp, qn);
178 cy = mpn_sub_n (np - dn, np - dn, tp, dn);
180 cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn);
184 qh -= mpn_sub_1 (qp, qp, qn, 1);
185 cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn);
189 qn = nn - dn - qn + 1;
194 mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp);
198 /* Since we pretended we'd need an extra quotient limb before, we now
199 have made sure the code above left just dn-1=qn quotient limbs to
200 develop. Develop that plus a guard limb. */
205 mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp);
206 MPN_COPY_INCR (qp, qp + 1, qn);
212 #if 0 /* not possible since we demand nn > dn */
215 qh = mpn_cmp (np - dn, dp - dn, dn) >= 0;
217 mpn_sub_n (np - dn, np - dn, dp - dn, dn);
223 qp -= qn; /* point at low limb of next quotient block */
224 np -= qn; /* point in the middle of partial remainder */
226 q2p = TMP_SALLOC_LIMBS (qn + 1);
227 /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on
228 callers not to be silly? */
229 if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD))
231 qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1),
232 dp - (qn + 1), qn + 1, dinv->inv32);
236 /* It is tempting to use qp for recursive scratch and put quotient in
237 tp, but the recursive scratch needs one limb too many. */
238 tp = TMP_SALLOC_LIMBS (qn + 1);
239 qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp);
241 MPN_COPY (qp, q2p + 1, qn);