1 /* mpn_sqr_basecase -- Internal routine to square a natural number
4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
8 Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
9 2005, 2008 Free Software Foundation, Inc.
11 This file is part of the GNU MP Library.
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of the GNU Lesser General Public License as published by
15 the Free Software Foundation; either version 3 of the License, or (at your
16 option) any later version.
18 The GNU MP Library is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
21 License for more details.
23 You should have received a copy of the GNU Lesser General Public License
24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
31 #if HAVE_NATIVE_mpn_sqr_diagonal
32 #define MPN_SQR_DIAGONAL(rp, up, n) \
33 mpn_sqr_diagonal (rp, up, n)
35 #define MPN_SQR_DIAGONAL(rp, up, n) \
38 for (_i = 0; _i < (n); _i++) \
42 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
43 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
49 #undef READY_WITH_mpn_sqr_basecase
52 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
54 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
57 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
61 /* must fit 2*n limbs in tarr */
62 ASSERT (n <= SQR_TOOM2_THRESHOLD);
70 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
71 rp[0] = lpl >> GMP_NAIL_BITS;
77 for (i = 0; i <= n - 2; i += 2)
79 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
89 rp[3] = mpn_addmul_2 (rp, up, 2, up);
95 for (i = 0; i <= n - 4; i += 2)
97 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
100 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
104 MPN_SQR_DIAGONAL (rp, up, n);
106 #if HAVE_NATIVE_mpn_addlsh1_n
107 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
109 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
110 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
114 #define READY_WITH_mpn_sqr_basecase
118 #if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
120 /* mpn_sqr_basecase using plain mpn_addmul_2.
122 This is tricky, since we have to let mpn_addmul_2 make some undesirable
123 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
124 This forces us to conditionally add or subtract the mpn_sqr_diagonal
125 results. Examples of the product we form:
128 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1
129 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3
131 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5
132 sub: u1 sub: u1 u3 sub: u1 u3
136 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
139 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
143 /* must fit 2*n limbs in tarr */
144 ASSERT (n <= SQR_TOOM2_THRESHOLD);
154 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
155 rp[0] = lpl >> GMP_NAIL_BITS;
159 /* The code below doesn't like unnormalized operands. Since such
160 operands are unusual, handle them with a dumb recursion. */
165 mpn_sqr_basecase (rp, up, n - 1);
171 for (i = 0; i <= n - 2; i += 2)
173 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
177 MPN_SQR_DIAGONAL (rp, up, n);
182 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
184 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
185 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
188 mpn_incr_u (rp + i + 4, cy);
199 rp[3] = mpn_addmul_2 (rp, up, 2, up);
203 /* The code below doesn't like unnormalized operands. Since such
204 operands are unusual, handle them with a dumb recursion. */
209 mpn_sqr_basecase (rp, up, n - 1);
215 for (i = 0; i <= n - 4; i += 2)
217 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
220 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
223 MPN_SQR_DIAGONAL (rp, up, n);
228 rp[i + 0] = (-x0) & GMP_NUMB_MASK;
230 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
233 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
234 mpn_incr_u (rp + i + 4, cy);
236 mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
239 #if HAVE_NATIVE_mpn_addlsh1_n
240 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
242 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
243 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
247 #define READY_WITH_mpn_sqr_basecase
251 #if ! defined (READY_WITH_mpn_sqr_basecase)
253 /* Default mpn_sqr_basecase using mpn_addmul_1. */
256 mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
261 ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
266 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
267 rp[0] = lpl >> GMP_NAIL_BITS;
271 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
275 /* must fit 2*n limbs in tarr */
276 ASSERT (n <= SQR_TOOM2_THRESHOLD);
278 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
280 for (i = 2; i < n; i++)
283 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
286 MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
290 #if HAVE_NATIVE_mpn_addlsh1_n
291 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
293 cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
294 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);