1 /* __mpn_mul_n -- Multiply two natural numbers of length n.
3 Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
5 This file is part of the GNU MP Library.
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Library General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or (at your
10 option) any later version.
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
15 License for more details.
17 You should have received a copy of the GNU Library General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
19 the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25 both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
26 always stored. Return the most significant limb.
29 1. PRODP != UP and PRODP != VP, i.e. the destination
30 must be distinct from the multiplier and the multiplicand. */
32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
33 value which is good on most machines. */
34 #ifndef KARATSUBA_THRESHOLD
35 #define KARATSUBA_THRESHOLD 32
38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
39 #if KARATSUBA_THRESHOLD < 2
40 #undef KARATSUBA_THRESHOLD
41 #define KARATSUBA_THRESHOLD 2
46 ____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
51 /* Handle simple cases with traditional multiplication.
53 This is the most critical code of multiplication. All multiplies rely
54 on this, both small and huge. Small ones arrive here immediately. Huge
55 ones arrive here as this is the base case for Karatsuba's recursive
60 ____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
62 ____mpn_mul_n_basecase (prodp, up, vp, size)
73 /* Multiply by the first limb in V separately, as the result can be
74 stored (not added) to PROD. We also avoid a loop for zeroing. */
79 MPN_COPY (prodp, up, size);
81 MPN_ZERO (prodp, size);
85 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
87 prodp[size] = cy_limb;
90 /* For each iteration in the outer loop, multiply one limb from
91 U with one limb from V, and add it to PROD. */
92 for (i = 1; i < size; i++)
99 cy_limb = __mpn_add_n (prodp, prodp, up, size);
102 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
104 prodp[size] = cy_limb;
111 ____mpn_mul_n (mp_ptr prodp,
112 mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
114 ____mpn_mul_n (prodp, up, vp, size, tspace)
124 /* The size is odd, the code code below doesn't handle that.
125 Multiply the least significant (size - 1) limbs with a recursive
126 call, and handle the most significant limb of S1 and S2
128 /* A slightly faster way to do this would be to make the Karatsuba
129 code below behave as if the size were even, and let it check for
130 odd size in the end. I.e., in essence move this code to the end.
131 Doing so would save us a recursive call, and potentially make the
132 stack grow a lot less. */
134 mp_size_t esize = size - 1; /* even size */
137 MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
138 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
139 prodp[esize + esize] = cy_limb;
140 cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
142 prodp[esize + size] = cy_limb;
146 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
148 Split U in two pieces, U1 and U0, such that
150 and V in V1 and V0, such that
153 UV is then computed recursively using the identity
156 UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
159 Where B = 2**BITS_PER_MP_LIMB. */
161 mp_size_t hsize = size >> 1;
165 /*** Product H. ________________ ________________
166 |_____U1 x V1____||____U0 x V0_____| */
167 /* Put result in upper part of PROD and pass low part of TSPACE
169 MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
171 /*** Product M. ________________
172 |_(U1-U0)(V0-V1)_| */
173 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
175 __mpn_sub_n (prodp, up + hsize, up, hsize);
180 __mpn_sub_n (prodp, up, up + hsize, hsize);
183 if (__mpn_cmp (vp + hsize, vp, hsize) >= 0)
185 __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
190 __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
191 /* No change of NEGFLG. */
193 /* Read temporary operands from low part of PROD.
194 Put result in low part of TSPACE using upper part of TSPACE
196 MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
198 /*** Add/copy product H. */
199 MPN_COPY (prodp + hsize, prodp + size, hsize);
200 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
202 /*** Add product M (if NEGFLG M is a negative number). */
204 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
206 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
208 /*** Product L. ________________ ________________
209 |________________||____U0 x V0_____| */
210 /* Read temporary operands from low part of PROD.
211 Put result in low part of TSPACE using upper part of TSPACE
213 MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
215 /*** Add/copy Product L (twice). */
217 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
221 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
224 __mpn_sub_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
229 MPN_COPY (prodp, tspace, hsize);
230 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
232 __mpn_add_1 (prodp + size, prodp + size, size, 1);
238 ____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
240 ____mpn_sqr_n_basecase (prodp, up, size)
250 /* Multiply by the first limb in V separately, as the result can be
251 stored (not added) to PROD. We also avoid a loop for zeroing. */
256 MPN_COPY (prodp, up, size);
258 MPN_ZERO (prodp, size);
262 cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
264 prodp[size] = cy_limb;
267 /* For each iteration in the outer loop, multiply one limb from
268 U with one limb from V, and add it to PROD. */
269 for (i = 1; i < size; i++)
276 cy_limb = __mpn_add_n (prodp, prodp, up, size);
279 cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
281 prodp[size] = cy_limb;
288 ____mpn_sqr_n (mp_ptr prodp,
289 mp_srcptr up, mp_size_t size, mp_ptr tspace)
291 ____mpn_sqr_n (prodp, up, size, tspace)
300 /* The size is odd, the code code below doesn't handle that.
301 Multiply the least significant (size - 1) limbs with a recursive
302 call, and handle the most significant limb of S1 and S2
304 /* A slightly faster way to do this would be to make the Karatsuba
305 code below behave as if the size were even, and let it check for
306 odd size in the end. I.e., in essence move this code to the end.
307 Doing so would save us a recursive call, and potentially make the
308 stack grow a lot less. */
310 mp_size_t esize = size - 1; /* even size */
313 MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
314 cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
315 prodp[esize + esize] = cy_limb;
316 cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]);
318 prodp[esize + size] = cy_limb;
322 mp_size_t hsize = size >> 1;
325 /*** Product H. ________________ ________________
326 |_____U1 x U1____||____U0 x U0_____| */
327 /* Put result in upper part of PROD and pass low part of TSPACE
329 MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
331 /*** Product M. ________________
332 |_(U1-U0)(U0-U1)_| */
333 if (__mpn_cmp (up + hsize, up, hsize) >= 0)
335 __mpn_sub_n (prodp, up + hsize, up, hsize);
339 __mpn_sub_n (prodp, up, up + hsize, hsize);
342 /* Read temporary operands from low part of PROD.
343 Put result in low part of TSPACE using upper part of TSPACE
345 MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
347 /*** Add/copy product H. */
348 MPN_COPY (prodp + hsize, prodp + size, hsize);
349 cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
351 /*** Add product M (if NEGFLG M is a negative number). */
352 cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
354 /*** Product L. ________________ ________________
355 |________________||____U0 x U0_____| */
356 /* Read temporary operands from low part of PROD.
357 Put result in low part of TSPACE using upper part of TSPACE
359 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
361 /*** Add/copy Product L (twice). */
363 cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
367 __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
370 __mpn_sub_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
375 MPN_COPY (prodp, tspace, hsize);
376 cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
378 __mpn_add_1 (prodp + size, prodp + size, size, 1);
382 /* This should be made into an inline function in gmp.h. */
385 __mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
387 __mpn_mul_n (prodp, up, vp, size)
396 if (size < KARATSUBA_THRESHOLD)
398 ____mpn_sqr_n_basecase (prodp, up, size);
403 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
404 ____mpn_sqr_n (prodp, up, size, tspace);
409 if (size < KARATSUBA_THRESHOLD)
411 ____mpn_mul_n_basecase (prodp, up, vp, size);
416 tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
417 ____mpn_mul_n (prodp, up, vp, size, tspace);