1 /* mulmod_bnm1.c -- multiplication mod B^n-1.
3 Contributed to the GNU project by Niels Möller, Torbjorn Granlund and
6 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
7 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
8 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
10 Copyright 2009, 2010 Free Software Foundation, Inc.
12 This file is part of the GNU MP Library.
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of the GNU Lesser General Public License as published by
16 the Free Software Foundation; either version 3 of the License, or (at your
17 option) any later version.
19 The GNU MP Library is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
21 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
22 License for more details.
24 You should have received a copy of the GNU Lesser General Public License
25 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
32 /* Inputs are {ap,rn} and {bp,rn}; output is {rp,rn}, computation is
33 mod B^rn - 1, and values are semi-normalised; zero is represented
34 as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp.
37 mpn_bc_mulmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
44 mpn_mul_n (tp, ap, bp, rn);
45 cy = mpn_add_n (rp, tp, tp + rn, rn);
46 /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
47 * be no overflow when adding in the carry. */
48 MPN_INCR_U (rp, rn, cy);
52 /* Inputs are {ap,rn+1} and {bp,rn+1}; output is {rp,rn+1}, in
53 semi-normalised representation, computation is mod B^rn + 1. Needs
54 a scratch area of 2rn + 2 limbs at tp; tp == rp is allowed.
55 Output is normalised. */
57 mpn_bc_mulmod_bnp1 (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t rn,
64 mpn_mul_n (tp, ap, bp, rn + 1);
65 ASSERT (tp[2*rn+1] == 0);
66 ASSERT (tp[2*rn] < GMP_NUMB_MAX);
67 cy = tp[2*rn] + mpn_sub_n (rp, tp, tp+rn, rn);
69 MPN_INCR_U (rp, rn+1, cy );
73 /* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1)
75 * The result is expected to be ZERO if and only if one of the operand
76 * already is. Otherwise the class [0] Mod(B^rn-1) is represented by
77 * B^rn-1. This should not be a problem if mulmod_bnm1 is used to
78 * combine results and obtain a natural number when one knows in
79 * advance that the final value is less than (B^rn-1).
80 * Moreover it should not be a problem if mulmod_bnm1 is used to
81 * compute the full product with an+bn <= rn, because this condition
82 * implies (B^an-1)(B^bn-1) < (B^rn-1) .
84 * Requires 0 < bn <= an <= rn and an + bn > rn/2
85 * Scratch need: rn + (need for recursive call OR rn + 4). This gives
87 * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4
90 mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp)
96 if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD))
98 if (UNLIKELY (bn < rn))
100 if (UNLIKELY (an + bn <= rn))
102 mpn_mul (rp, ap, an, bp, bn);
107 mpn_mul (tp, ap, an, bp, bn);
108 cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn);
109 MPN_INCR_U (rp, rn, cy);
113 mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp);
123 /* We need at least an + bn >= n, to be able to fit one of the
124 recursive products at rp. Requiring strict inequality makes
125 the coded slightly simpler. If desired, we could avoid this
126 restriction by initially halving rn as long as rn is even and
129 ASSERT (an + bn > n);
131 /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1)
134 x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)]
142 #define xp tp /* 2n + 2 */
143 /* am1 maybe in {xp, n} */
144 /* bm1 maybe in {xp + n, n} */
145 #define sp1 (tp + 2*n + 2)
146 /* ap1 maybe in {sp1, n + 1} */
147 /* bp1 maybe in {sp1 + n + 1, n + 1} */
157 cy = mpn_add (xp, a0, n, a1, an - n);
158 MPN_INCR_U (xp, n, cy);
163 cy = mpn_add (xp + n, b0, n, b1, bn - n);
164 MPN_INCR_U (xp + n, n, cy);
184 mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so);
192 if (LIKELY (an > n)) {
194 cy = mpn_sub (sp1, a0, n, a1, an - n);
196 MPN_INCR_U (sp1, n + 1, cy);
203 if (LIKELY (bn > n)) {
205 cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n);
207 MPN_INCR_U (sp1 + n + 1, n + 1, cy);
214 if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD))
219 k = mpn_fft_best_k (n, 0);
221 while (n & mask) {k--; mask >>=1;};
223 if (k >= FFT_FIRST_K)
224 xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k);
225 else if (UNLIKELY (bp1 == b0))
227 ASSERT (anp + bnp <= 2*n+1);
228 ASSERT (anp + bnp > n);
230 mpn_mul (xp, ap1, anp, bp1, bnp);
232 ASSERT (anp <= n || xp[2*n]==0);
234 cy = mpn_sub (xp, xp, n, xp + n, anp);
236 MPN_INCR_U (xp, n+1, cy);
239 mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp);
242 /* Here the CRT recomposition begins.
244 xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1)
245 Division by 2 is a bitwise rotation.
247 Assumes xp normalised mod (B^n+1).
249 The residue class [0] is represented by [B^n-1]; except when
253 #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc
254 #if HAVE_NATIVE_mpn_rsh1add_nc
255 cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */
256 hi = cy << (GMP_NUMB_BITS - 1);
258 /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi
259 overflows, i.e. a further increment will not overflow again. */
261 cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */
262 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
264 /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that
265 the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */
267 #if GMP_NAIL_BITS == 0
268 add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi);
270 cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1);
273 #else /* ! HAVE_NATIVE_mpn_rsh1add_n */
274 #if HAVE_NATIVE_mpn_add_nc
275 cy = mpn_add_nc(rp, rp, xp, n, xp[n]);
277 cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */
280 mpn_rshift(rp, rp, n, 1);
282 hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */
284 /* We can have cy != 0 only if hi = 0... */
285 ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0);
287 /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */
290 /* Next increment can not overflow, read the previous comments about cy. */
291 ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0));
292 MPN_INCR_U(rp, n, cy);
294 /* Compute the highest half:
295 ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n
297 if (UNLIKELY (an + bn < rn))
299 /* Note that in this case, the only way the result can equal
300 zero mod B^{rn} - 1 is if one of the inputs is zero, and
301 then the output of both the recursive calls and this CRT
302 reconstruction is zero, not B^{rn} - 1. Which is good,
303 since the latter representation doesn't fit in the output
305 cy = mpn_sub_n (rp + n, rp, xp, an + bn - n);
307 /* FIXME: This subtraction of the high parts is not really
308 necessary, we do it to get the carry out, and for sanity
310 cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n,
311 xp + an + bn - n, rn - (an + bn), cy);
312 ASSERT (an + bn == rn - 1 ||
313 mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn)));
314 cy = mpn_sub_1 (rp, rp, an + bn, cy);
315 ASSERT (cy == (xp + an + bn - n)[0]);
319 cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n);
320 /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO.
321 DECR will affect _at most_ the lowest n limbs. */
322 MPN_DECR_U (rp, 2*n, cy);
334 mpn_mulmod_bnm1_next_size (mp_size_t n)
338 if (BELOW_THRESHOLD (n, MULMOD_BNM1_THRESHOLD))
340 if (BELOW_THRESHOLD (n, 4 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
341 return (n + (2-1)) & (-2);
342 if (BELOW_THRESHOLD (n, 8 * (MULMOD_BNM1_THRESHOLD - 1) + 1))
343 return (n + (4-1)) & (-4);
347 if (BELOW_THRESHOLD (nh, MUL_FFT_MODF_THRESHOLD))
348 return (n + (8-1)) & (-8);
350 return 2 * mpn_fft_next_size (nh, mpn_fft_best_k (nh, 0));