1 /* mpn_toom33_mul -- Multiply {ap,an} and {p,bn} where an and bn are close in
2 size. Or more accurately, bn <= an < (3/2)bn.
4 Contributed to the GNU project by Torbjorn Granlund.
5 Additional improvements by Marco Bodrato.
7 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
8 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
9 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
11 Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
13 This file is part of the GNU MP Library.
15 The GNU MP Library is free software; you can redistribute it and/or modify
16 it under the terms of the GNU Lesser General Public License as published by
17 the Free Software Foundation; either version 3 of the License, or (at your
18 option) any later version.
20 The GNU MP Library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
23 License for more details.
25 You should have received a copy of the GNU Lesser General Public License
26 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
32 /* Evaluate in: -1, 0, +1, +2, +inf
34 <-s--><--n--><--n--><--n-->
35 ____ ______ ______ ______
36 |_a3_|___a2_|___a1_|___a0_|
37 |b3_|___b2_|___b1_|___b0_|
38 <-t-><--n--><--n--><--n-->
40 v0 = a0 * b0 # A(0)*B(0)
41 v1 = (a0+ a1+ a2)*(b0+ b1+ b2) # A(1)*B(1) ah <= 2 bh <= 2
42 vm1 = (a0- a1+ a2)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 bh <= 1
43 v2 = (a0+2a1+4a2)*(b0+2b1+4b2) # A(2)*B(2) ah <= 6 bh <= 6
44 vinf= a2 * b2 # A(inf)*B(inf)
47 #if TUNE_PROGRAM_BUILD
48 #define MAYBE_mul_basecase 1
49 #define MAYBE_mul_toom33 1
51 #define MAYBE_mul_basecase \
52 (MUL_TOOM33_THRESHOLD < 3 * MUL_TOOM22_THRESHOLD)
53 #define MAYBE_mul_toom33 \
54 (MUL_TOOM44_THRESHOLD >= 3 * MUL_TOOM33_THRESHOLD)
57 /* FIXME: TOOM33_MUL_N_REC is not quite right for a balanced
58 multiplication at the infinity point. We may have
59 MAYBE_mul_basecase == 0, and still get s just below
60 MUL_TOOM22_THRESHOLD. If MUL_TOOM33_THRESHOLD == 7, we can even get
61 s == 1 and mpn_toom22_mul will crash.
64 #define TOOM33_MUL_N_REC(p, a, b, n, ws) \
66 if (MAYBE_mul_basecase \
67 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
68 mpn_mul_basecase (p, a, n, b, n); \
69 else if (! MAYBE_mul_toom33 \
70 || BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
71 mpn_toom22_mul (p, a, n, b, n, ws); \
73 mpn_toom33_mul (p, a, n, b, n, ws); \
77 mpn_toom33_mul (mp_ptr pp,
78 mp_srcptr ap, mp_size_t an,
79 mp_srcptr bp, mp_size_t bn,
86 mp_ptr as1, asm1, as2;
87 mp_ptr bs1, bsm1, bs2;
96 n = (an + 2) / (size_t) 3;
103 ASSERT (0 < s && s <= n);
104 ASSERT (0 < t && t <= n);
106 as1 = scratch + 4 * n + 4;
107 asm1 = scratch + 2 * n + 2;
111 bsm1 = scratch + 3 * n + 3; /* we need 4n+4 <= 4n+s+t */
112 bs2 = pp + 2 * n + 2;
118 /* Compute as1 and asm1. */
119 cy = mpn_add (gp, a0, n, a2, s);
120 #if HAVE_NATIVE_mpn_add_n_sub_n
121 if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
123 cy = mpn_add_n_sub_n (as1, asm1, a1, gp, n);
130 cy2 = mpn_add_n_sub_n (as1, asm1, gp, a1, n);
131 as1[n] = cy + (cy2 >> 1);
132 asm1[n] = cy - (cy & 1);
135 as1[n] = cy + mpn_add_n (as1, gp, a1, n);
136 if (cy == 0 && mpn_cmp (gp, a1, n) < 0)
138 mpn_sub_n (asm1, a1, gp, n);
144 cy -= mpn_sub_n (asm1, gp, a1, n);
150 #if HAVE_NATIVE_mpn_rsblsh1_n
151 cy = mpn_add_n (as2, a2, as1, s);
153 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
155 cy = 2 * cy + mpn_rsblsh1_n (as2, a0, as2, n);
157 #if HAVE_NATIVE_mpn_addlsh1_n
158 cy = mpn_addlsh1_n (as2, a1, a2, s);
160 cy = mpn_add_1 (as2 + s, a1 + s, n - s, cy);
161 cy = 2 * cy + mpn_addlsh1_n (as2, a0, as2, n);
163 cy = mpn_add_n (as2, a2, as1, s);
165 cy = mpn_add_1 (as2 + s, as1 + s, n - s, cy);
167 cy = 2 * cy + mpn_lshift (as2, as2, n, 1);
168 cy -= mpn_sub_n (as2, as2, a0, n);
173 /* Compute bs1 and bsm1. */
174 cy = mpn_add (gp, b0, n, b2, t);
175 #if HAVE_NATIVE_mpn_add_n_sub_n
176 if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
178 cy = mpn_add_n_sub_n (bs1, bsm1, b1, gp, n);
185 cy2 = mpn_add_n_sub_n (bs1, bsm1, gp, b1, n);
186 bs1[n] = cy + (cy2 >> 1);
187 bsm1[n] = cy - (cy & 1);
190 bs1[n] = cy + mpn_add_n (bs1, gp, b1, n);
191 if (cy == 0 && mpn_cmp (gp, b1, n) < 0)
193 mpn_sub_n (bsm1, b1, gp, n);
199 cy -= mpn_sub_n (bsm1, gp, b1, n);
205 #if HAVE_NATIVE_mpn_rsblsh1_n
206 cy = mpn_add_n (bs2, b2, bs1, t);
208 cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
210 cy = 2 * cy + mpn_rsblsh1_n (bs2, b0, bs2, n);
212 #if HAVE_NATIVE_mpn_addlsh1_n
213 cy = mpn_addlsh1_n (bs2, b1, b2, t);
215 cy = mpn_add_1 (bs2 + t, b1 + t, n - t, cy);
216 cy = 2 * cy + mpn_addlsh1_n (bs2, b0, bs2, n);
218 cy = mpn_add_n (bs2, bs1, b2, t);
220 cy = mpn_add_1 (bs2 + t, bs1 + t, n - t, cy);
222 cy = 2 * cy + mpn_lshift (bs2, bs2, n, 1);
223 cy -= mpn_sub_n (bs2, bs2, b0, n);
228 ASSERT (as1[n] <= 2);
229 ASSERT (bs1[n] <= 2);
230 ASSERT (asm1[n] <= 1);
231 ASSERT (bsm1[n] <= 1);
232 ASSERT (as2[n] <= 6);
233 ASSERT (bs2[n] <= 6);
235 #define v0 pp /* 2n */
236 #define v1 (pp + 2 * n) /* 2n+1 */
237 #define vinf (pp + 4 * n) /* s+t */
238 #define vm1 scratch /* 2n+1 */
239 #define v2 (scratch + 2 * n + 1) /* 2n+2 */
240 #define scratch_out (scratch + 5 * n + 5)
242 /* vm1, 2n+1 limbs */
243 #ifdef SMALLER_RECURSION
244 TOOM33_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
247 cy = bsm1[n] + mpn_add_n (vm1 + n, vm1 + n, bsm1, n);
249 cy += mpn_add_n (vm1 + n, vm1 + n, asm1, n);
252 TOOM33_MUL_N_REC (vm1, asm1, bsm1, n + 1, scratch_out);
255 TOOM33_MUL_N_REC (v2, as2, bs2, n + 1, scratch_out); /* v2, 2n+1 limbs */
257 /* vinf, s+t limbs */
258 if (s > t) mpn_mul (vinf, a2, s, b2, t);
259 else TOOM33_MUL_N_REC (vinf, a2, b2, s, scratch_out);
261 vinf0 = vinf[0]; /* v1 overlaps with this */
263 #ifdef SMALLER_RECURSION
265 TOOM33_MUL_N_REC (v1, as1, bs1, n, scratch_out);
268 cy = bs1[n] + mpn_add_n (v1 + n, v1 + n, bs1, n);
270 else if (as1[n] != 0)
272 #if HAVE_NATIVE_mpn_addlsh1_n
273 cy = 2 * bs1[n] + mpn_addlsh1_n (v1 + n, v1 + n, bs1, n);
275 cy = 2 * bs1[n] + mpn_addmul_1 (v1 + n, bs1, n, CNST_LIMB(2));
282 cy += mpn_add_n (v1 + n, v1 + n, as1, n);
284 else if (bs1[n] != 0)
286 #if HAVE_NATIVE_mpn_addlsh1_n
287 cy += mpn_addlsh1_n (v1 + n, v1 + n, as1, n);
289 cy += mpn_addmul_1 (v1 + n, as1, n, CNST_LIMB(2));
295 TOOM33_MUL_N_REC (v1, as1, bs1, n + 1, scratch_out);
299 TOOM33_MUL_N_REC (v0, ap, bp, n, scratch_out); /* v0, 2n limbs */
301 mpn_toom_interpolate_5pts (pp, v2, vm1, n, s + t, vm1_neg, vinf0);