stdlib/mul_n.c

   1 /* mpn_mul_n -- Multiply two natural numbers of length n.
   2
   3 Copyright (C) 1991-2013 Free Software Foundation, Inc.
   4
   5 This file is part of the GNU MP Library.
   6
   7 The GNU MP Library is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU Lesser General Public License as published by
   9 the Free Software Foundation; either version 2.1 of the License, or (at your
  10 option) any later version.
  11
  12 The GNU MP Library is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  15 License for more details.
  16
  17 You should have received a copy of the GNU Lesser General Public License
  18 along with the GNU MP Library; see the file COPYING.LIB.  If not, see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include <gmp.h>
  22 #include "gmp-impl.h"
  23
  24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
  25    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
  26    always stored.  Return the most significant limb.
  27
  28    Argument constraints:
  29    1. PRODP != UP and PRODP != VP, i.e. the destination
  30       must be distinct from the multiplier and the multiplicand.  */
  31
  32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
  33    value which is good on most machines.  */
  34 #ifndef KARATSUBA_THRESHOLD
  35 #define KARATSUBA_THRESHOLD 32
  36 #endif
  37
  38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
  39 #if KARATSUBA_THRESHOLD < 2
  40 #undef KARATSUBA_THRESHOLD
  41 #define KARATSUBA_THRESHOLD 2
  42 #endif
  43
  44 /* Handle simple cases with traditional multiplication.
  45
  46    This is the most critical code of multiplication.  All multiplies rely
  47    on this, both small and huge.  Small ones arrive here immediately.  Huge
  48    ones arrive here as this is the base case for Karatsuba's recursive
  49    algorithm below.  */
  50
  51 void
  52 #if __STDC__
  53 impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
  54 #else
  55 impn_mul_n_basecase (prodp, up, vp, size)
  56      mp_ptr prodp;
  57      mp_srcptr up;
  58      mp_srcptr vp;
  59      mp_size_t size;
  60 #endif
  61 {
  62   mp_size_t i;
  63   mp_limb_t cy_limb;
  64   mp_limb_t v_limb;
  65
  66   /* Multiply by the first limb in V separately, as the result can be
  67      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  68   v_limb = vp[0];
  69   if (v_limb <= 1)
  70     {
  71       if (v_limb == 1)
  72         MPN_COPY (prodp, up, size);
  73       else
  74         MPN_ZERO (prodp, size);
  75       cy_limb = 0;
  76     }
  77   else
  78     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
  79
  80   prodp[size] = cy_limb;
  81   prodp++;
  82
  83   /* For each iteration in the outer loop, multiply one limb from
  84      U with one limb from V, and add it to PROD.  */
  85   for (i = 1; i < size; i++)
  86     {
  87       v_limb = vp[i];
  88       if (v_limb <= 1)
  89         {
  90           cy_limb = 0;
  91           if (v_limb == 1)
  92             cy_limb = mpn_add_n (prodp, prodp, up, size);
  93         }
  94       else
  95         cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
  96
  97       prodp[size] = cy_limb;
  98       prodp++;
  99     }
 100 }
 101
 102 void
 103 #if __STDC__
 104 impn_mul_n (mp_ptr prodp,
 105              mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
 106 #else
 107 impn_mul_n (prodp, up, vp, size, tspace)
 108      mp_ptr prodp;
 109      mp_srcptr up;
 110      mp_srcptr vp;
 111      mp_size_t size;
 112      mp_ptr tspace;
 113 #endif
 114 {
 115   if ((size & 1) != 0)
 116     {
 117       /* The size is odd, the code code below doesn't handle that.
 118          Multiply the least significant (size - 1) limbs with a recursive
 119          call, and handle the most significant limb of S1 and S2
 120          separately.  */
 121       /* A slightly faster way to do this would be to make the Karatsuba
 122          code below behave as if the size were even, and let it check for
 123          odd size in the end.  I.e., in essence move this code to the end.
 124          Doing so would save us a recursive call, and potentially make the
 125          stack grow a lot less.  */
 126
 127       mp_size_t esize = size - 1;       /* even size */
 128       mp_limb_t cy_limb;
 129
 130       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
 131       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
 132       prodp[esize + esize] = cy_limb;
 133       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
 134
 135       prodp[esize + size] = cy_limb;
 136     }
 137   else
 138     {
 139       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
 140
 141          Split U in two pieces, U1 and U0, such that
 142          U = U0 + U1*(B**n),
 143          and V in V1 and V0, such that
 144          V = V0 + V1*(B**n).
 145
 146          UV is then computed recursively using the identity
 147
 148                 2n   n          n                     n
 149          UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
 150                         1 1        1  0   0  1              0 0
 151
 152          Where B = 2**BITS_PER_MP_LIMB.  */
 153
 154       mp_size_t hsize = size >> 1;
 155       mp_limb_t cy;
 156       int negflg;
 157
 158       /*** Product H.    ________________  ________________
 159                         |_____U1 x V1____||____U0 x V0_____|  */
 160       /* Put result in upper part of PROD and pass low part of TSPACE
 161          as new TSPACE.  */
 162       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
 163
 164       /*** Product M.    ________________
 165                         |_(U1-U0)(V0-V1)_|  */
 166       if (mpn_cmp (up + hsize, up, hsize) >= 0)
 167         {
 168           mpn_sub_n (prodp, up + hsize, up, hsize);
 169           negflg = 0;
 170         }
 171       else
 172         {
 173           mpn_sub_n (prodp, up, up + hsize, hsize);
 174           negflg = 1;
 175         }
 176       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
 177         {
 178           mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
 179           negflg ^= 1;
 180         }
 181       else
 182         {
 183           mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
 184           /* No change of NEGFLG.  */
 185         }
 186       /* Read temporary operands from low part of PROD.
 187          Put result in low part of TSPACE using upper part of TSPACE
 188          as new TSPACE.  */
 189       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
 190
 191       /*** Add/copy product H.  */
 192       MPN_COPY (prodp + hsize, prodp + size, hsize);
 193       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
 194
 195       /*** Add product M (if NEGFLG M is a negative number).  */
 196       if (negflg)
 197         cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
 198       else
 199         cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 200
 201       /*** Product L.    ________________  ________________
 202                         |________________||____U0 x V0_____|  */
 203       /* Read temporary operands from low part of PROD.
 204          Put result in low part of TSPACE using upper part of TSPACE
 205          as new TSPACE.  */
 206       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
 207
 208       /*** Add/copy Product L (twice).  */
 209
 210       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 211       if (cy)
 212         mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
 213
 214       MPN_COPY (prodp, tspace, hsize);
 215       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
 216       if (cy)
 217         mpn_add_1 (prodp + size, prodp + size, size, 1);
 218     }
 219 }
 220
 221 void
 222 #if __STDC__
 223 impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
 224 #else
 225 impn_sqr_n_basecase (prodp, up, size)
 226      mp_ptr prodp;
 227      mp_srcptr up;
 228      mp_size_t size;
 229 #endif
 230 {
 231   mp_size_t i;
 232   mp_limb_t cy_limb;
 233   mp_limb_t v_limb;
 234
 235   /* Multiply by the first limb in V separately, as the result can be
 236      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
 237   v_limb = up[0];
 238   if (v_limb <= 1)
 239     {
 240       if (v_limb == 1)
 241         MPN_COPY (prodp, up, size);
 242       else
 243         MPN_ZERO (prodp, size);
 244       cy_limb = 0;
 245     }
 246   else
 247     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
 248
 249   prodp[size] = cy_limb;
 250   prodp++;
 251
 252   /* For each iteration in the outer loop, multiply one limb from
 253      U with one limb from V, and add it to PROD.  */
 254   for (i = 1; i < size; i++)
 255     {
 256       v_limb = up[i];
 257       if (v_limb <= 1)
 258         {
 259           cy_limb = 0;
 260           if (v_limb == 1)
 261             cy_limb = mpn_add_n (prodp, prodp, up, size);
 262         }
 263       else
 264         cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
 265
 266       prodp[size] = cy_limb;
 267       prodp++;
 268     }
 269 }
 270
 271 void
 272 #if __STDC__
 273 impn_sqr_n (mp_ptr prodp,
 274              mp_srcptr up, mp_size_t size, mp_ptr tspace)
 275 #else
 276 impn_sqr_n (prodp, up, size, tspace)
 277      mp_ptr prodp;
 278      mp_srcptr up;
 279      mp_size_t size;
 280      mp_ptr tspace;
 281 #endif
 282 {
 283   if ((size & 1) != 0)
 284     {
 285       /* The size is odd, the code code below doesn't handle that.
 286          Multiply the least significant (size - 1) limbs with a recursive
 287          call, and handle the most significant limb of S1 and S2
 288          separately.  */
 289       /* A slightly faster way to do this would be to make the Karatsuba
 290          code below behave as if the size were even, and let it check for
 291          odd size in the end.  I.e., in essence move this code to the end.
 292          Doing so would save us a recursive call, and potentially make the
 293          stack grow a lot less.  */
 294
 295       mp_size_t esize = size - 1;       /* even size */
 296       mp_limb_t cy_limb;
 297
 298       MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
 299       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
 300       prodp[esize + esize] = cy_limb;
 301       cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
 302
 303       prodp[esize + size] = cy_limb;
 304     }
 305   else
 306     {
 307       mp_size_t hsize = size >> 1;
 308       mp_limb_t cy;
 309
 310       /*** Product H.    ________________  ________________
 311                         |_____U1 x U1____||____U0 x U0_____|  */
 312       /* Put result in upper part of PROD and pass low part of TSPACE
 313          as new TSPACE.  */
 314       MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
 315
 316       /*** Product M.    ________________
 317                         |_(U1-U0)(U0-U1)_|  */
 318       if (mpn_cmp (up + hsize, up, hsize) >= 0)
 319         {
 320           mpn_sub_n (prodp, up + hsize, up, hsize);
 321         }
 322       else
 323         {
 324           mpn_sub_n (prodp, up, up + hsize, hsize);
 325         }
 326
 327       /* Read temporary operands from low part of PROD.
 328          Put result in low part of TSPACE using upper part of TSPACE
 329          as new TSPACE.  */
 330       MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
 331
 332       /*** Add/copy product H.  */
 333       MPN_COPY (prodp + hsize, prodp + size, hsize);
 334       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
 335
 336       /*** Add product M (if NEGFLG M is a negative number).  */
 337       cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
 338
 339       /*** Product L.    ________________  ________________
 340                         |________________||____U0 x U0_____|  */
 341       /* Read temporary operands from low part of PROD.
 342          Put result in low part of TSPACE using upper part of TSPACE
 343          as new TSPACE.  */
 344       MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
 345
 346       /*** Add/copy Product L (twice).  */
 347
 348       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
 349       if (cy)
 350         mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
 351
 352       MPN_COPY (prodp, tspace, hsize);
 353       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
 354       if (cy)
 355         mpn_add_1 (prodp + size, prodp + size, size, 1);
 356     }
 357 }
 358
 359 /* This should be made into an inline function in gmp.h.  */
 360 void
 361 #if __STDC__
 362 mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
 363 #else
 364 mpn_mul_n (prodp, up, vp, size)
 365      mp_ptr prodp;
 366      mp_srcptr up;
 367      mp_srcptr vp;
 368      mp_size_t size;
 369 #endif
 370 {
 371   TMP_DECL (marker);
 372   TMP_MARK (marker);
 373   if (up == vp)
 374     {
 375       if (size < KARATSUBA_THRESHOLD)
 376         {
 377           impn_sqr_n_basecase (prodp, up, size);
 378         }
 379       else
 380         {
 381           mp_ptr tspace;
 382           tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
 383           impn_sqr_n (prodp, up, size, tspace);
 384         }
 385     }
 386   else
 387     {
 388       if (size < KARATSUBA_THRESHOLD)
 389         {
 390           impn_mul_n_basecase (prodp, up, vp, size);
 391         }
 392       else
 393         {
 394           mp_ptr tspace;
 395           tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
 396           impn_mul_n (prodp, up, vp, size, tspace);
 397         }
 398     }
 399   TMP_FREE (marker);
 400 }