gmp-impl.h

   1 /* Include file for internal GNU MP types and definitions.
   2
   3    THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
   4    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
   5
   6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
   7 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU Lesser General Public License as published by
  13 the Free Software Foundation; either version 3 of the License, or (at your
  14 option) any later version.
  15
  16 The GNU MP Library is distributed in the hope that it will be useful, but
  17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  19 License for more details.
  20
  21 You should have received a copy of the GNU Lesser General Public License
  22 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
  23
  24
  25 /* __GMP_DECLSPEC must be given on any global data that will be accessed
  26    from outside libgmp, meaning from the test or development programs, or
  27    from libgmpxx.  Failing to do this will result in an incorrect address
  28    being used for the accesses.  On functions __GMP_DECLSPEC makes calls
  29    from outside libgmp more efficient, but they'll still work fine without
  30    it.  */
  31
  32
  33 #ifndef __GMP_IMPL_H__
  34 #define __GMP_IMPL_H__
  35
  36 #if defined _CRAY
  37 #include <intrinsics.h>  /* for _popcnt */
  38 #endif
  39
  40 /* limits.h is not used in general, since it's an ANSI-ism, and since on
  41    solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
  42    values (the ABI=64 values).
  43
  44    On Cray vector systems, however, we need the system limits.h since sizes
  45    of signed and unsigned types can differ there, depending on compiler
  46    options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
  47    reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
  48    short can be 24, 32, 46 or 64 bits, and different for ushort.  */
  49
  50 #if defined _CRAY
  51 #include <limits.h>
  52 #endif
  53
  54 /* For fat.h and other fat binary stuff.
  55    No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
  56    declared this way are only used to set function pointers in __gmp_cpuvec,
  57    they're not called directly.  */
  58 #define DECL_add_n(name) \
  59   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
  60 #define DECL_addmul_1(name) \
  61   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  62 #define DECL_copyd(name) \
  63   __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  64 #define DECL_copyi(name) \
  65   DECL_copyd (name)
  66 #define DECL_divexact_1(name) \
  67   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  68 #define DECL_divexact_by3c(name) \
  69   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  70 #define DECL_divrem_1(name) \
  71   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
  72 #define DECL_gcd_1(name) \
  73   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  74 #define DECL_lshift(name) \
  75   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
  76 #define DECL_mod_1(name) \
  77   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  78 #define DECL_mod_34lsub1(name) \
  79   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
  80 #define DECL_modexact_1c_odd(name) \
  81   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  82 #define DECL_mul_1(name) \
  83   DECL_addmul_1 (name)
  84 #define DECL_mul_basecase(name) \
  85   __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
  86 #define DECL_preinv_divrem_1(name) \
  87   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
  88 #define DECL_preinv_mod_1(name) \
  89   __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  90 #define DECL_rshift(name) \
  91   DECL_lshift (name)
  92 #define DECL_sqr_basecase(name) \
  93   __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  94 #define DECL_sub_n(name) \
  95   DECL_add_n (name)
  96 #define DECL_submul_1(name) \
  97   DECL_addmul_1 (name)
  98
  99 #if ! __GMP_WITHIN_CONFIGURE
 100 #include "config.h"
 101 #include "gmp-mparam.h"
 102 #include "fib_table.h"
 103 #include "mp_bases.h"
 104 #if WANT_FAT_BINARY
 105 #include "fat.h"
 106 #endif
 107 #endif
 108
 109 #if HAVE_INTTYPES_H      /* for uint_least32_t */
 110 # include <inttypes.h>
 111 #else
 112 # if HAVE_STDINT_H
 113 #  include <stdint.h>
 114 # endif
 115 #endif
 116
 117 #ifdef __cplusplus
 118 #include <cstring>  /* for strlen */
 119 #include <string>   /* for std::string */
 120 #endif
 121
 122
 123 #ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
 124 #define WANT_TMP_DEBUG 0
 125 #endif
 126
 127 /* The following tries to get a good version of alloca.  The tests are
 128    adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
 129    Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
 130    be setup appropriately.
 131
 132    ifndef alloca - a cpp define might already exist.
 133        glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
 134        HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
 135
 136    GCC __builtin_alloca - preferred whenever available.
 137
 138    _AIX pragma - IBM compilers need a #pragma in "each module that needs to
 139        use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
 140        used in past versions of GMP, retained still in case it matters.
 141
 142        The autoconf manual says this pragma needs to be at the start of a C
 143        file, apart from comments and preprocessor directives.  Is that true?
 144        xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
 145        from gmp.h.
 146 */
 147
 148 #ifndef alloca
 149 # ifdef __GNUC__
 150 #  define alloca __builtin_alloca
 151 # else
 152 #  ifdef __DECC
 153 #   define alloca(x) __ALLOCA(x)
 154 #  else
 155 #   ifdef _MSC_VER
 156 #    include <malloc.h>
 157 #    define alloca _alloca
 158 #   else
 159 #    if HAVE_ALLOCA_H
 160 #     include <alloca.h>
 161 #    else
 162 #     if defined (_AIX) || defined (_IBMR2)
 163  #pragma alloca
 164 #     else
 165        char *alloca ();
 166 #     endif
 167 #    endif
 168 #   endif
 169 #  endif
 170 # endif
 171 #endif
 172
 173
 174 /* if not provided by gmp-mparam.h */
 175 #ifndef BYTES_PER_MP_LIMB
 176 #define BYTES_PER_MP_LIMB  SIZEOF_MP_LIMB_T
 177 #endif
 178 #define GMP_LIMB_BYTES  BYTES_PER_MP_LIMB
 179 #ifndef GMP_LIMB_BITS
 180 #define GMP_LIMB_BITS  (8 * SIZEOF_MP_LIMB_T)
 181 #endif
 182
 183 #define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
 184
 185
 186 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
 187 #if HAVE_UINT_LEAST32_T
 188 typedef uint_least32_t      gmp_uint_least32_t;
 189 #else
 190 #if SIZEOF_UNSIGNED_SHORT >= 4
 191 typedef unsigned short      gmp_uint_least32_t;
 192 #else
 193 #if SIZEOF_UNSIGNED >= 4
 194 typedef unsigned            gmp_uint_least32_t;
 195 #else
 196 typedef unsigned long       gmp_uint_least32_t;
 197 #endif
 198 #endif
 199 #endif
 200
 201
 202 /* gmp_intptr_t, for pointer to integer casts */
 203 #if HAVE_INTPTR_T
 204 typedef intptr_t            gmp_intptr_t;
 205 #else /* fallback */
 206 typedef size_t              gmp_intptr_t;
 207 #endif
 208
 209
 210 /* pre-inverse types for truncating division and modulo */
 211 typedef struct {mp_limb_t inv32;} gmp_pi1_t;
 212 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
 213
 214
 215 /* const and signed must match __gmp_const and __gmp_signed, so follow the
 216    decision made for those in gmp.h.    */
 217 #if ! __GMP_HAVE_CONST
 218 #define const   /* empty */
 219 #define signed  /* empty */
 220 #endif
 221
 222 /* "const" basically means a function does nothing but examine its arguments
 223    and give a return value, it doesn't read or write any memory (neither
 224    global nor pointed to by arguments), and has no other side-effects.  This
 225    is more restrictive than "pure".  See info node "(gcc)Function
 226    Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
 227    this off when trying to write timing loops.  */
 228 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
 229 #define ATTRIBUTE_CONST  __attribute__ ((const))
 230 #else
 231 #define ATTRIBUTE_CONST
 232 #endif
 233
 234 #if HAVE_ATTRIBUTE_NORETURN
 235 #define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
 236 #else
 237 #define ATTRIBUTE_NORETURN
 238 #endif
 239
 240 /* "malloc" means a function behaves like malloc in that the pointer it
 241    returns doesn't alias anything.  */
 242 #if HAVE_ATTRIBUTE_MALLOC
 243 #define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
 244 #else
 245 #define ATTRIBUTE_MALLOC
 246 #endif
 247
 248
 249 #if ! HAVE_STRCHR
 250 #define strchr(s,c)  index(s,c)
 251 #endif
 252
 253 #if ! HAVE_MEMSET
 254 #define memset(p, c, n)                 \
 255   do {                                  \
 256     ASSERT ((n) >= 0);                  \
 257     char *__memset__p = (p);            \
 258     int  __i;                           \
 259     for (__i = 0; __i < (n); __i++)     \
 260       __memset__p[__i] = (c);           \
 261   } while (0)
 262 #endif
 263
 264 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
 265    mode.  Falling back to a memcpy will give maximum portability, since it
 266    works no matter whether va_list is a pointer, struct or array.  */
 267 #if ! defined (va_copy) && defined (__va_copy)
 268 #define va_copy(dst,src)  __va_copy(dst,src)
 269 #endif
 270 #if ! defined (va_copy)
 271 #define va_copy(dst,src) \
 272   do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
 273 #endif
 274
 275
 276 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
 277    (ie. ctlz, ctpop, cttz).  */
 278 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
 279   || HAVE_HOST_CPU_alphaev7
 280 #define HAVE_HOST_CPU_alpha_CIX 1
 281 #endif
 282
 283
 284 #if defined (__cplusplus)
 285 extern "C" {
 286 #endif
 287
 288
 289 /* Usage: TMP_DECL;
 290           TMP_MARK;
 291           ptr = TMP_ALLOC (bytes);
 292           TMP_FREE;
 293
 294    Small allocations should use TMP_SALLOC, big allocations should use
 295    TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
 296
 297    Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
 298    TMP_SFREE.
 299
 300    TMP_DECL just declares a variable, but might be empty and so must be last
 301    in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
 302    TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
 303    TMP_MARK was made, but then no TMP_ALLOCs.  */
 304
 305 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
 306    __gmp_allocate_func doesn't already determine it.  Currently TMP_ALLOC
 307    isn't used for "double"s, so that's not in the union.  */
 308 union tmp_align_t {
 309   mp_limb_t  l;
 310   char       *p;
 311 };
 312 #define __TMP_ALIGN  sizeof (union tmp_align_t)
 313
 314 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
 315    "a" must be an unsigned type.
 316    This is designed for use with a compile-time constant "m".
 317    The POW2 case is expected to be usual, and gcc 3.0 and up recognises
 318    "(-(8*n))%8" or the like is always zero, which means the rounding up in
 319    the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
 320 #define ROUND_UP_MULTIPLE(a,m)          \
 321   (POW2_P(m) ? (a) + (-(a))%(m)         \
 322    : (a)+(m)-1 - (((a)+(m)-1) % (m)))
 323
 324 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
 325 struct tmp_reentrant_t {
 326   struct tmp_reentrant_t  *next;
 327   size_t                  size;   /* bytes, including header */
 328 };
 329 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
 330 __GMP_DECLSPEC void  __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
 331 #endif
 332
 333 #if WANT_TMP_ALLOCA
 334 #define TMP_SDECL
 335 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 336 #define TMP_SMARK
 337 #define TMP_MARK                __tmp_marker = 0
 338 #define TMP_SALLOC(n)           alloca(n)
 339 #define TMP_BALLOC(n)           __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 340 #define TMP_ALLOC(n)                                                    \
 341   (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
 342 #define TMP_SFREE
 343 #define TMP_FREE                                                           \
 344   do {                                                                     \
 345     if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
 346   } while (0)
 347 #endif
 348
 349 #if WANT_TMP_REENTRANT
 350 #define TMP_SDECL               TMP_DECL
 351 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 352 #define TMP_SMARK               TMP_MARK
 353 #define TMP_MARK                __tmp_marker = 0
 354 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 355 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 356 #define TMP_ALLOC(n)            __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 357 #define TMP_SFREE               TMP_FREE
 358 #define TMP_FREE                __gmp_tmp_reentrant_free (__tmp_marker)
 359 #endif
 360
 361 #if WANT_TMP_NOTREENTRANT
 362 struct tmp_marker
 363 {
 364   struct tmp_stack *which_chunk;
 365   void *alloc_point;
 366 };
 367 __GMP_DECLSPEC void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
 368 __GMP_DECLSPEC void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
 369 __GMP_DECLSPEC void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
 370 #define TMP_SDECL               TMP_DECL
 371 #define TMP_DECL                struct tmp_marker __tmp_marker
 372 #define TMP_SMARK               TMP_MARK
 373 #define TMP_MARK                __gmp_tmp_mark (&__tmp_marker)
 374 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 375 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 376 #define TMP_ALLOC(n)                                                    \
 377   __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
 378 #define TMP_SFREE               TMP_FREE
 379 #define TMP_FREE                __gmp_tmp_free (&__tmp_marker)
 380 #endif
 381
 382 #if WANT_TMP_DEBUG
 383 /* See tal-debug.c for some comments. */
 384 struct tmp_debug_t {
 385   struct tmp_debug_entry_t  *list;
 386   const char                *file;
 387   int                       line;
 388 };
 389 struct tmp_debug_entry_t {
 390   struct tmp_debug_entry_t  *next;
 391   char                      *block;
 392   size_t                    size;
 393 };
 394 __GMP_DECLSPEC void  __gmp_tmp_debug_mark  __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
 395                                                          struct tmp_debug_t *,
 396                                                          const char *, const char *));
 397 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
 398                                                          struct tmp_debug_t **, const char *,
 399                                                          size_t)) ATTRIBUTE_MALLOC;
 400 __GMP_DECLSPEC void  __gmp_tmp_debug_free  __GMP_PROTO ((const char *, int, int,
 401                                                          struct tmp_debug_t **,
 402                                                          const char *, const char *));
 403 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 404 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 405 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 406 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 407 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 408 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 409 /* The marker variable is designed to provoke an uninitialized variable
 410    warning from the compiler if TMP_FREE is used without a TMP_MARK.
 411    __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
 412    these things up too.  */
 413 #define TMP_DECL_NAME(marker, marker_name)                      \
 414   int marker;                                                   \
 415   int __tmp_marker_inscope;                                     \
 416   const char *__tmp_marker_name = marker_name;                  \
 417   struct tmp_debug_t  __tmp_marker_struct;                      \
 418   /* don't demand NULL, just cast a zero */                     \
 419   struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
 420 #define TMP_MARK_NAME(marker, marker_name)                      \
 421   do {                                                          \
 422     marker = 1;                                                 \
 423     __tmp_marker_inscope = 1;                                   \
 424     __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \
 425                            &__tmp_marker, &__tmp_marker_struct, \
 426                            __tmp_marker_name, marker_name);     \
 427   } while (0)
 428 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 429 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 430 #define TMP_ALLOC(size)                                                 \
 431   __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \
 432                          __tmp_marker_inscope,                          \
 433                          &__tmp_marker, __tmp_marker_name, size)
 434 #define TMP_FREE_NAME(marker, marker_name)                      \
 435   do {                                                          \
 436     __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \
 437                            marker, &__tmp_marker,               \
 438                            __tmp_marker_name, marker_name);     \
 439   } while (0)
 440 #endif /* WANT_TMP_DEBUG */
 441
 442
 443 /* Allocating various types. */
 444 #define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
 445 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
 446 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
 447 #define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
 448 #define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
 449 #define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
 450 #define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
 451 #define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
 452 #define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
 453
 454 /* It's more efficient to allocate one block than two.  This is certainly
 455    true of the malloc methods, but it can even be true of alloca if that
 456    involves copying a chunk of stack (various RISCs), or a call to a stack
 457    bounds check (mingw).  In any case, when debugging keep separate blocks
 458    so a redzoning malloc debugger can protect each individually.  */
 459 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)           \
 460   do {                                                  \
 461     if (WANT_TMP_DEBUG)                                 \
 462       {                                                 \
 463         (xp) = TMP_ALLOC_LIMBS (xsize);                 \
 464         (yp) = TMP_ALLOC_LIMBS (ysize);                 \
 465       }                                                 \
 466     else                                                \
 467       {                                                 \
 468         (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));     \
 469         (yp) = (xp) + (xsize);                          \
 470       }                                                 \
 471   } while (0)
 472
 473
 474 /* From gmp.h, nicer names for internal use. */
 475 #define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
 476 #define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
 477 #define LIKELY(cond)                   __GMP_LIKELY(cond)
 478 #define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
 479
 480 #define ABS(x) ((x) >= 0 ? (x) : -(x))
 481 #undef MIN
 482 #define MIN(l,o) ((l) < (o) ? (l) : (o))
 483 #undef MAX
 484 #define MAX(h,i) ((h) > (i) ? (h) : (i))
 485 #define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
 486
 487 /* Field access macros.  */
 488 #define SIZ(x) ((x)->_mp_size)
 489 #define ABSIZ(x) ABS (SIZ (x))
 490 #define PTR(x) ((x)->_mp_d)
 491 #define LIMBS(x) ((x)->_mp_d)
 492 #define EXP(x) ((x)->_mp_exp)
 493 #define PREC(x) ((x)->_mp_prec)
 494 #define ALLOC(x) ((x)->_mp_alloc)
 495
 496 /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
 497    then that lowest one bit must have been the only bit set.  n==0 will
 498    return true though, so avoid that.  */
 499 #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
 500
 501
 502 /* The "short" defines are a bit different because shorts are promoted to
 503    ints by ~ or >> etc.
 504
 505    #ifndef's are used since on some systems (HP?) header files other than
 506    limits.h setup these defines.  We could forcibly #undef in that case, but
 507    there seems no need to worry about that.  */
 508
 509 #ifndef ULONG_MAX
 510 #define ULONG_MAX   __GMP_ULONG_MAX
 511 #endif
 512 #ifndef UINT_MAX
 513 #define UINT_MAX    __GMP_UINT_MAX
 514 #endif
 515 #ifndef USHRT_MAX
 516 #define USHRT_MAX   __GMP_USHRT_MAX
 517 #endif
 518 #define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
 519
 520 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
 521    unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
 522    treats the plain decimal values in <limits.h> as signed.  */
 523 #define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
 524 #define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
 525 #define USHRT_HIGHBIT      ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
 526 #define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
 527
 528 #ifndef LONG_MIN
 529 #define LONG_MIN           ((long) ULONG_HIGHBIT)
 530 #endif
 531 #ifndef LONG_MAX
 532 #define LONG_MAX           (-(LONG_MIN+1))
 533 #endif
 534
 535 #ifndef INT_MIN
 536 #define INT_MIN            ((int) UINT_HIGHBIT)
 537 #endif
 538 #ifndef INT_MAX
 539 #define INT_MAX            (-(INT_MIN+1))
 540 #endif
 541
 542 #ifndef SHRT_MIN
 543 #define SHRT_MIN           ((short) USHRT_HIGHBIT)
 544 #endif
 545 #ifndef SHRT_MAX
 546 #define SHRT_MAX           ((short) (-(SHRT_MIN+1)))
 547 #endif
 548
 549 #if __GMP_MP_SIZE_T_INT
 550 #define MP_SIZE_T_MAX      INT_MAX
 551 #define MP_SIZE_T_MIN      INT_MIN
 552 #else
 553 #define MP_SIZE_T_MAX      LONG_MAX
 554 #define MP_SIZE_T_MIN      LONG_MIN
 555 #endif
 556
 557 /* mp_exp_t is the same as mp_size_t */
 558 #define MP_EXP_T_MAX   MP_SIZE_T_MAX
 559 #define MP_EXP_T_MIN   MP_SIZE_T_MIN
 560
 561 #define LONG_HIGHBIT       LONG_MIN
 562 #define INT_HIGHBIT        INT_MIN
 563 #define SHRT_HIGHBIT       SHRT_MIN
 564
 565
 566 #define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
 567
 568 #if GMP_NAIL_BITS == 0
 569 #define GMP_NAIL_LOWBIT   CNST_LIMB(0)
 570 #else
 571 #define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
 572 #endif
 573
 574 #if GMP_NAIL_BITS != 0
 575 /* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
 576    code that has not yet been qualified.  */
 577
 578 #undef  DC_DIV_QR_THRESHOLD
 579 #define DC_DIV_QR_THRESHOLD              50
 580
 581 #undef DIVREM_1_NORM_THRESHOLD
 582 #undef DIVREM_1_UNNORM_THRESHOLD
 583 #undef MOD_1_NORM_THRESHOLD
 584 #undef MOD_1_UNNORM_THRESHOLD
 585 #undef USE_PREINV_DIVREM_1
 586 #undef DIVREM_2_THRESHOLD
 587 #undef DIVEXACT_1_THRESHOLD
 588 #define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
 589 #define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
 590 #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
 591 #define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
 592 #define USE_PREINV_DIVREM_1               0  /* no preinv */
 593 #define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
 594
 595 /* mpn/generic/mul_fft.c is not nails-capable. */
 596 #undef  MUL_FFT_THRESHOLD
 597 #undef  SQR_FFT_THRESHOLD
 598 #define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
 599 #define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
 600 #endif
 601
 602 /* Swap macros. */
 603
 604 #define MP_LIMB_T_SWAP(x, y)                    \
 605   do {                                          \
 606     mp_limb_t __mp_limb_t_swap__tmp = (x);      \
 607     (x) = (y);                                  \
 608     (y) = __mp_limb_t_swap__tmp;                \
 609   } while (0)
 610 #define MP_SIZE_T_SWAP(x, y)                    \
 611   do {                                          \
 612     mp_size_t __mp_size_t_swap__tmp = (x);      \
 613     (x) = (y);                                  \
 614     (y) = __mp_size_t_swap__tmp;                \
 615   } while (0)
 616
 617 #define MP_PTR_SWAP(x, y)               \
 618   do {                                  \
 619     mp_ptr __mp_ptr_swap__tmp = (x);    \
 620     (x) = (y);                          \
 621     (y) = __mp_ptr_swap__tmp;           \
 622   } while (0)
 623 #define MP_SRCPTR_SWAP(x, y)                    \
 624   do {                                          \
 625     mp_srcptr __mp_srcptr_swap__tmp = (x);      \
 626     (x) = (y);                                  \
 627     (y) = __mp_srcptr_swap__tmp;                \
 628   } while (0)
 629
 630 #define MPN_PTR_SWAP(xp,xs, yp,ys)      \
 631   do {                                  \
 632     MP_PTR_SWAP (xp, yp);               \
 633     MP_SIZE_T_SWAP (xs, ys);            \
 634   } while(0)
 635 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \
 636   do {                                  \
 637     MP_SRCPTR_SWAP (xp, yp);            \
 638     MP_SIZE_T_SWAP (xs, ys);            \
 639   } while(0)
 640
 641 #define MPZ_PTR_SWAP(x, y)              \
 642   do {                                  \
 643     mpz_ptr __mpz_ptr_swap__tmp = (x);  \
 644     (x) = (y);                          \
 645     (y) = __mpz_ptr_swap__tmp;          \
 646   } while (0)
 647 #define MPZ_SRCPTR_SWAP(x, y)                   \
 648   do {                                          \
 649     mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \
 650     (x) = (y);                                  \
 651     (y) = __mpz_srcptr_swap__tmp;               \
 652   } while (0)
 653
 654
 655 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
 656    but current gcc (3.0) doesn't seem to support that.  */
 657 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
 658 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
 659 __GMP_DECLSPEC extern void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
 660
 661 __GMP_DECLSPEC void *__gmp_default_allocate __GMP_PROTO ((size_t));
 662 __GMP_DECLSPEC void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
 663 __GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t));
 664
 665 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
 666   ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
 667 #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
 668
 669 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
 670   ((type *) (*__gmp_reallocate_func)                            \
 671    (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
 672 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
 673   __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
 674
 675 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
 676 #define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
 677
 678 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \
 679   do {                                                          \
 680     if ((oldsize) != (newsize))                                 \
 681       (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
 682   } while (0)
 683
 684 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \
 685   do {                                                                  \
 686     if ((oldsize) != (newsize))                                         \
 687       (ptr) = (type *) (*__gmp_reallocate_func)                         \
 688         (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
 689   } while (0)
 690
 691
 692 /* Dummy for non-gcc, code involving it will go dead. */
 693 #if ! defined (__GNUC__) || __GNUC__ < 2
 694 #define __builtin_constant_p(x)   0
 695 #endif
 696
 697
 698 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
 699    stack usage is compatible.  __attribute__ ((regparm (N))) helps by
 700    putting leading parameters in registers, avoiding extra stack.
 701
 702    regparm cannot be used with calls going through the PLT, because the
 703    binding code there may clobber the registers (%eax, %edx, %ecx) used for
 704    the regparm parameters.  Calls to local (ie. static) functions could
 705    still use this, if we cared to differentiate locals and globals.
 706
 707    On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
 708    -p or -pg profiling, since that version of gcc doesn't realize the
 709    .mcount calls will clobber the parameter registers.  Other systems are
 710    ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
 711    bother to try to detect this.  regparm is only an optimization so we just
 712    disable it when profiling (profiling being a slowdown anyway).  */
 713
 714 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
 715   && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
 716 #define USE_LEADING_REGPARM 1
 717 #else
 718 #define USE_LEADING_REGPARM 0
 719 #endif
 720
 721 /* Macros for altering parameter order according to regparm usage. */
 722 #if USE_LEADING_REGPARM
 723 #define REGPARM_2_1(a,b,x)    x,a,b
 724 #define REGPARM_3_1(a,b,c,x)  x,a,b,c
 725 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
 726 #else
 727 #define REGPARM_2_1(a,b,x)    a,b,x
 728 #define REGPARM_3_1(a,b,c,x)  a,b,c,x
 729 #define REGPARM_ATTR(n)
 730 #endif
 731
 732
 733 /* ASM_L gives a local label for a gcc asm block, for use when temporary
 734    local labels like "1:" might not be available, which is the case for
 735    instance on the x86s (the SCO assembler doesn't support them).
 736
 737    The label generated is made unique by including "%=" which is a unique
 738    number for each insn.  This ensures the same name can be used in multiple
 739    asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
 740    allowed there's no need for a label to be usable outside a single
 741    block.  */
 742
 743 #define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
 744
 745
 746 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
 747 #if 0
 748 /* FIXME: Check that these actually improve things.
 749    FIXME: Need a cld after each std.
 750    FIXME: Can't have inputs in clobbered registers, must describe them as
 751    dummy outputs, and add volatile. */
 752 #define MPN_COPY_INCR(DST, SRC, N)                                      \
 753   __asm__ ("cld\n\trep\n\tmovsl" : :                                    \
 754            "D" (DST), "S" (SRC), "c" (N) :                              \
 755            "cx", "di", "si", "memory")
 756 #define MPN_COPY_DECR(DST, SRC, N)                                      \
 757   __asm__ ("std\n\trep\n\tmovsl" : :                                    \
 758            "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :      \
 759            "cx", "di", "si", "memory")
 760 #endif
 761 #endif
 762
 763
 764 __GMP_DECLSPEC void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
 765 #define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
 766
 767 #define mpz_n_pow_ui __gmpz_n_pow_ui
 768 __GMP_DECLSPEC void    mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
 769
 770
 771 #define mpn_addmul_1c __MPN(addmul_1c)
 772 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 773
 774 #define mpn_addmul_2 __MPN(addmul_2)
 775 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 776
 777 #define mpn_addmul_3 __MPN(addmul_3)
 778 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 779
 780 #define mpn_addmul_4 __MPN(addmul_4)
 781 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 782
 783 #define mpn_addmul_5 __MPN(addmul_5)
 784 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 785
 786 #define mpn_addmul_6 __MPN(addmul_6)
 787 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 788
 789 #define mpn_addmul_7 __MPN(addmul_7)
 790 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 791
 792 #define mpn_addmul_8 __MPN(addmul_8)
 793 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 794
 795 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
 796    returns the carry out (0, 1 or 2).  */
 797 #define mpn_addlsh1_n __MPN(addlsh1_n)
 798 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 799
 800 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
 801    returns the carry out (0, ..., 4).  */
 802 #define mpn_addlsh2_n __MPN(addlsh2_n)
 803 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 804
 805 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
 806    returns the carry out (0, ..., 2^k).  */
 807 #define mpn_addlsh_n __MPN(addlsh_n)
 808   __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
 809
 810 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
 811    returns the borrow out (0, 1 or 2).  */
 812 #define mpn_sublsh1_n __MPN(sublsh1_n)
 813 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 814
 815 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
 816    returns the carry out (-1, 0, 1).  */
 817 #define mpn_rsblsh1_n __MPN(rsblsh1_n)
 818 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 819
 820 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
 821    returns the borrow out (FIXME 0, 1, 2 or 3).  */
 822 #define mpn_sublsh2_n __MPN(sublsh2_n)
 823 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 824
 825 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
 826    returns the carry out (-1, ..., 3).  */
 827 #define mpn_rsblsh2_n __MPN(rsblsh2_n)
 828 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 829
 830 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
 831    returns the carry out (-1, 0, ..., 2^k-1).  */
 832 #define mpn_rsblsh_n __MPN(rsblsh_n)
 833 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
 834
 835 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
 836    and returns the bit rshifted out (0 or 1).  */
 837 #define mpn_rsh1add_n __MPN(rsh1add_n)
 838 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 839 #define mpn_rsh1add_nc __MPN(rsh1add_nc)
 840 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 841
 842 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
 843    and returns the bit rshifted out (0 or 1).  If there's a borrow from the
 844    subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
 845    complement negative.  */
 846 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
 847 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 848 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
 849 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 850
 851 #define mpn_lshiftc __MPN(lshiftc)
 852 __GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
 853
 854 #define mpn_add_n_sub_n __MPN(add_n_sub_n)
 855 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 856
 857 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
 858 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 859
 860 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
 861 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 862
 863 #define mpn_divrem_1c __MPN(divrem_1c)
 864 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 865
 866 #define mpn_dump __MPN(dump)
 867 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
 868
 869 #define mpn_fib2_ui __MPN(fib2_ui)
 870 __GMP_DECLSPEC mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
 871
 872 /* Remap names of internal mpn functions.  */
 873 #define __clz_tab               __MPN(clz_tab)
 874 #define mpn_udiv_w_sdiv         __MPN(udiv_w_sdiv)
 875
 876 #define mpn_jacobi_base __MPN(jacobi_base)
 877 __GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
 878
 879 #define mpn_mod_1c __MPN(mod_1c)
 880 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
 881
 882 #define mpn_mul_1c __MPN(mul_1c)
 883 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 884
 885 #define mpn_mul_2 __MPN(mul_2)
 886 __GMP_DECLSPEC mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 887
 888 #define mpn_mul_3 __MPN(mul_3)
 889 __GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 890
 891 #define mpn_mul_4 __MPN(mul_4)
 892 __GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 893
 894 #ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
 895 #define mpn_mul_basecase __MPN(mul_basecase)
 896 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
 897 #endif
 898
 899 #define mpn_mullo_n __MPN(mullo_n)
 900 __GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 901
 902 #define mpn_mullo_basecase __MPN(mullo_basecase)
 903 __GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 904
 905 #define mpn_sqr __MPN(sqr)
 906 __GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
 907
 908 #ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
 909 #define mpn_sqr_basecase __MPN(sqr_basecase)
 910 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
 911 #endif
 912
 913 #define mpn_submul_1c __MPN(submul_1c)
 914 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 915
 916 #define mpn_redc_1 __MPN(redc_1)
 917 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
 918
 919 #define mpn_redc_2 __MPN(redc_2)
 920 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 921 #define mpn_redc_n __MPN(redc_n)
 922 __GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 923
 924
 925 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
 926 __GMP_DECLSPEC void mpn_mod_1_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
 927 #define mpn_mod_1_1p __MPN(mod_1_1p)
 928 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4])) __GMP_ATTRIBUTE_PURE;
 929
 930 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
 931 __GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
 932 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
 933 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5])) __GMP_ATTRIBUTE_PURE;
 934
 935 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
 936 __GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
 937 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
 938 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6])) __GMP_ATTRIBUTE_PURE;
 939
 940 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
 941 __GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
 942 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
 943 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7])) __GMP_ATTRIBUTE_PURE;
 944
 945 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
 946 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
 947 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
 948 __GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
 949 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
 950 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
 951 static inline mp_size_t
 952 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
 953   mp_size_t n, itch;
 954   n = rn >> 1;
 955   itch = rn + 4 +
 956     (an > n ? (bn > n ? rn : n) : 0);
 957   return itch;
 958 }
 959
 960 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
 961 __GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
 962 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
 963 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
 964 static inline mp_size_t
 965 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
 966   mp_size_t n, itch;
 967   n = rn >> 1;
 968   itch = rn + 3 +
 969     (an > n ? an : 0);
 970   return itch;
 971 }
 972
 973 typedef __gmp_randstate_struct *gmp_randstate_ptr;
 974 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
 975
 976 /* Pseudo-random number generator function pointers structure.  */
 977 typedef struct {
 978   void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
 979   void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
 980   void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
 981   void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
 982 } gmp_randfnptr_t;
 983
 984 /* Macro to obtain a void pointer to the function pointers structure.  */
 985 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
 986
 987 /* Macro to obtain a pointer to the generator's state.
 988    When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
 989 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
 990
 991 /* Write a given number of random bits to rp.  */
 992 #define _gmp_rand(rp, state, bits)                              \
 993   do {                                                          \
 994     gmp_randstate_ptr  __rstate = (state);                      \
 995     (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)   \
 996        (__rstate, rp, bits);                                    \
 997   } while (0)
 998
 999 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
1000
1001
1002 /* __gmp_rands is the global state for the old-style random functions, and
1003    is also used in the test programs (hence the __GMP_DECLSPEC).
1004
1005    There's no seeding here, so mpz_random etc will generate the same
1006    sequence every time.  This is not unlike the C library random functions
1007    if you don't seed them, so perhaps it's acceptable.  Digging up a seed
1008    from /dev/random or the like would work on many systems, but might
1009    encourage a false confidence, since it'd be pretty much impossible to do
1010    something that would work reliably everywhere.  In any case the new style
1011    functions are recommended to applications which care about randomness, so
1012    the old functions aren't too important.  */
1013
1014 __GMP_DECLSPEC extern char             __gmp_rands_initialized;
1015 __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
1016
1017 #define RANDS                                       \
1018   ((__gmp_rands_initialized ? 0                     \
1019     : (__gmp_rands_initialized = 1,                 \
1020        __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
1021    __gmp_rands)
1022
1023 /* this is used by the test programs, to free memory */
1024 #define RANDS_CLEAR()                   \
1025   do {                                  \
1026     if (__gmp_rands_initialized)        \
1027       {                                 \
1028         __gmp_rands_initialized = 0;    \
1029         gmp_randclear (__gmp_rands);    \
1030       }                                 \
1031   } while (0)
1032
1033
1034 /* For a threshold between algorithms A and B, size>=thresh is where B
1035    should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
1036    value 0 means only ever use B.  The tests for these special values will
1037    be compile-time constants, so the compiler should be able to eliminate
1038    the code for the unwanted algorithm.  */
1039
1040 #define ABOVE_THRESHOLD(size,thresh)    \
1041   ((thresh) == 0                        \
1042    || ((thresh) != MP_SIZE_T_MAX        \
1043        && (size) >= (thresh)))
1044 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
1045
1046 #define MPN_TOOM22_MUL_MINSIZE    4
1047 #define MPN_TOOM2_SQR_MINSIZE     4
1048
1049 #define MPN_TOOM33_MUL_MINSIZE   17
1050 #define MPN_TOOM3_SQR_MINSIZE    17
1051
1052 #define MPN_TOOM44_MUL_MINSIZE   30
1053 #define MPN_TOOM4_SQR_MINSIZE    30
1054
1055 #define MPN_TOOM6H_MUL_MINSIZE   46
1056 #define MPN_TOOM6_SQR_MINSIZE    46
1057
1058 #define MPN_TOOM8H_MUL_MINSIZE   86
1059 #define MPN_TOOM8_SQR_MINSIZE    86
1060
1061 #define MPN_TOOM32_MUL_MINSIZE   10
1062 #define MPN_TOOM42_MUL_MINSIZE   10
1063 #define MPN_TOOM43_MUL_MINSIZE   49 /* ??? */
1064 #define MPN_TOOM53_MUL_MINSIZE   49 /* ??? */
1065 #define MPN_TOOM63_MUL_MINSIZE   49
1066
1067 #define   mpn_sqr_diagonal __MPN(sqr_diagonal)
1068 __GMP_DECLSPEC void      mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1069
1070 #define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1071 __GMP_DECLSPEC void      mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t));
1072
1073 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
1074 #define   mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
1075 __GMP_DECLSPEC void      mpn_toom_interpolate_6pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t));
1076
1077 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
1078 #define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1079 __GMP_DECLSPEC void      mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1080
1081 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
1082 __GMP_DECLSPEC void      mpn_toom_interpolate_8pts __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1083
1084 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
1085 __GMP_DECLSPEC void      mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
1086
1087 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
1088 __GMP_DECLSPEC void      mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
1089
1090 #define   mpn_toom_couple_handling __MPN(toom_couple_handling)
1091 __GMP_DECLSPEC void mpn_toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
1092
1093 #define   mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
1094 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1095
1096 #define   mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
1097 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1098
1099 #define   mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
1100 __GMP_DECLSPEC int mpn_toom_eval_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1101
1102 #define   mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
1103 __GMP_DECLSPEC int mpn_toom_eval_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1104
1105 #define   mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
1106 __GMP_DECLSPEC int mpn_toom_eval_pm2exp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
1107
1108 #define   mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
1109 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
1110
1111 #define   mpn_toom22_mul __MPN(toom22_mul)
1112 __GMP_DECLSPEC void      mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1113
1114 #define   mpn_toom32_mul __MPN(toom32_mul)
1115 __GMP_DECLSPEC void      mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1116
1117 #define   mpn_toom42_mul __MPN(toom42_mul)
1118 __GMP_DECLSPEC void      mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1119
1120 #define   mpn_toom52_mul __MPN(toom52_mul)
1121 __GMP_DECLSPEC void      mpn_toom52_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1122
1123 #define   mpn_toom62_mul __MPN(toom62_mul)
1124 __GMP_DECLSPEC void      mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1125
1126 #define   mpn_toom2_sqr __MPN(toom2_sqr)
1127 __GMP_DECLSPEC void      mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1128
1129 #define   mpn_toom33_mul __MPN(toom33_mul)
1130 __GMP_DECLSPEC void      mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1131
1132 #define   mpn_toom43_mul __MPN(toom43_mul)
1133 __GMP_DECLSPEC void      mpn_toom43_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1134
1135 #define   mpn_toom53_mul __MPN(toom53_mul)
1136 __GMP_DECLSPEC void      mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1137
1138 #define   mpn_toom63_mul __MPN(toom63_mul)
1139 __GMP_DECLSPEC void      mpn_toom63_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1140
1141 #define   mpn_toom3_sqr __MPN(toom3_sqr)
1142 __GMP_DECLSPEC void      mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1143
1144 #define   mpn_toom44_mul __MPN(toom44_mul)
1145 __GMP_DECLSPEC void      mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1146
1147 #define   mpn_toom4_sqr __MPN(toom4_sqr)
1148 __GMP_DECLSPEC void      mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1149
1150 #define   mpn_toom6h_mul __MPN(toom6h_mul)
1151 __GMP_DECLSPEC void      mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1152
1153 #define   mpn_toom6_sqr __MPN(toom6_sqr)
1154 __GMP_DECLSPEC void      mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1155
1156 #define   mpn_toom8h_mul __MPN(toom8h_mul)
1157 __GMP_DECLSPEC void      mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1158
1159 #define   mpn_toom8_sqr __MPN(toom8_sqr)
1160 __GMP_DECLSPEC void      mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1161
1162 #define   mpn_fft_best_k __MPN(fft_best_k)
1163 __GMP_DECLSPEC int       mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1164
1165 #define   mpn_mul_fft __MPN(mul_fft)
1166 __GMP_DECLSPEC mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
1167
1168 #define   mpn_mul_fft_full __MPN(mul_fft_full)
1169 __GMP_DECLSPEC void      mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1170
1171 #define   mpn_nussbaumer_mul __MPN(nussbaumer_mul)
1172 __GMP_DECLSPEC void      mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1173
1174 #define   mpn_fft_next_size __MPN(fft_next_size)
1175 __GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1176
1177 #define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
1178 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1179
1180 #define   mpn_sbpi1_div_q __MPN(sbpi1_div_q)
1181 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1182
1183 #define   mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
1184 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1185
1186 #define   mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
1187 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1188 #define   mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
1189 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
1190
1191 #define   mpn_dcpi1_div_q __MPN(dcpi1_div_q)
1192 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1193
1194 #define   mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
1195 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1196 #define   mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n)
1197 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
1198
1199 #define   mpn_mu_div_qr __MPN(mu_div_qr)
1200 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1201 #define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1202 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1203 #define   mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
1204 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1205
1206 #define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1207 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1208
1209 #define   mpn_mu_divappr_q __MPN(mu_divappr_q)
1210 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1211 #define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1212 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1213 #define   mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
1214 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1215
1216 #define   mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
1217 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1218
1219 #define   mpn_mu_div_q __MPN(mu_div_q)
1220 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1221 #define   mpn_mu_div_q_itch __MPN(mu_div_q_itch)
1222 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1223
1224 #define  mpn_div_q __MPN(div_q)
1225 __GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1226
1227 #define   mpn_invert __MPN(invert)
1228 __GMP_DECLSPEC void      mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1229 #define mpn_invert_itch(n)  mpn_invertappr_itch(n)
1230
1231 #define   mpn_ni_invertappr __MPN(ni_invertappr)
1232 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1233 #define   mpn_invertappr __MPN(invertappr)
1234 __GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1235 #define mpn_invertappr_itch(n)  (3 * (n) + 2)
1236
1237 #define   mpn_binvert __MPN(binvert)
1238 __GMP_DECLSPEC void      mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1239 #define   mpn_binvert_itch __MPN(binvert_itch)
1240 __GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
1241
1242 #define mpn_bdiv_q_1 __MPN(bdiv_q_1)
1243 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1244
1245 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
1246 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
1247
1248 #define   mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
1249 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1250
1251 #define   mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
1252 __GMP_DECLSPEC void      mpn_sbpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1253
1254 #define   mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
1255 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1256 #define   mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
1257 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
1258
1259 #define   mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
1260 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1261 #define   mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
1262 __GMP_DECLSPEC void      mpn_dcpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1263
1264 #define   mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch)
1265 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
1266 #define   mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n)
1267 __GMP_DECLSPEC void      mpn_dcpi1_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1268
1269 #define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1270 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1271 #define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1272 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1273
1274 #define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1275 __GMP_DECLSPEC void      mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1276 #define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1277 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1278
1279 #define   mpn_bdiv_qr __MPN(bdiv_qr)
1280 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1281 #define   mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
1282 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1283
1284 #define   mpn_bdiv_q __MPN(bdiv_q)
1285 __GMP_DECLSPEC void      mpn_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1286 #define   mpn_bdiv_q_itch __MPN(bdiv_q_itch)
1287 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1288
1289 #define   mpn_divexact __MPN(divexact)
1290 __GMP_DECLSPEC void      mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1291 #define   mpn_divexact_itch __MPN(divexact_itch)
1292 __GMP_DECLSPEC mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1293
1294 #define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1295 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
1296 #define   mpn_bdiv_dbm1(dst, src, size, divisor) \
1297   mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1298
1299 #define   mpn_powm __MPN(powm)
1300 __GMP_DECLSPEC void      mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1301 #define   mpn_powlo __MPN(powlo)
1302 __GMP_DECLSPEC void      mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1303 #define   mpn_powm_sec __MPN(powm_sec)
1304 __GMP_DECLSPEC void      mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1305 #define   mpn_powm_sec_itch __MPN(powm_sec_itch)
1306 __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
1307 #define   mpn_subcnd_n __MPN(subcnd_n)
1308 __GMP_DECLSPEC mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
1309 #define   mpn_tabselect __MPN(tabselect)
1310 __GMP_DECLSPEC void      mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
1311 #define mpn_redc_1_sec __MPN(redc_1_sec)
1312 __GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1313
1314 #ifndef DIVEXACT_BY3_METHOD
1315 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1316 #define DIVEXACT_BY3_METHOD 0   /* default to using mpn_bdiv_dbm1c */
1317 #else
1318 #define DIVEXACT_BY3_METHOD 1
1319 #endif
1320 #endif
1321
1322 #if DIVEXACT_BY3_METHOD == 0
1323 #undef mpn_divexact_by3
1324 #define mpn_divexact_by3(dst,src,size) \
1325   (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1326 /* override mpn_divexact_by3c defined in gmp.h */
1327 /*
1328 #undef mpn_divexact_by3c
1329 #define mpn_divexact_by3c(dst,src,size,cy) \
1330   (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1331 */
1332 #endif
1333
1334 #if GMP_NUMB_BITS % 4 == 0
1335 #define mpn_divexact_by5(dst,src,size) \
1336   (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1337 #endif
1338
1339 #if GMP_NUMB_BITS % 6 == 0
1340 #define mpn_divexact_by7(dst,src,size) \
1341   (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1342 #endif
1343
1344 #if GMP_NUMB_BITS % 6 == 0
1345 #define mpn_divexact_by9(dst,src,size) \
1346   (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1347 #endif
1348
1349 #if GMP_NUMB_BITS % 10 == 0
1350 #define mpn_divexact_by11(dst,src,size) \
1351   (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1352 #endif
1353
1354 #if GMP_NUMB_BITS % 12 == 0
1355 #define mpn_divexact_by13(dst,src,size) \
1356   (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1357 #endif
1358
1359 #if GMP_NUMB_BITS % 4 == 0
1360 #define mpn_divexact_by15(dst,src,size) \
1361   (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1362 #endif
1363
1364 #define mpz_divexact_gcd  __gmpz_divexact_gcd
1365 __GMP_DECLSPEC void    mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
1366
1367 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1368 #ifdef _GMP_H_HAVE_FILE
1369 __GMP_DECLSPEC size_t  mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
1370 #endif
1371
1372 #define mpn_divisible_p __MPN(divisible_p)
1373 __GMP_DECLSPEC int     mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
1374
1375 #define   mpn_rootrem __MPN(rootrem)
1376 __GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1377
1378
1379 #if defined (_CRAY)
1380 #define MPN_COPY_INCR(dst, src, n)                                      \
1381   do {                                                                  \
1382     int __i;            /* Faster on some Crays with plain int */       \
1383     _Pragma ("_CRI ivdep");                                             \
1384     for (__i = 0; __i < (n); __i++)                                     \
1385       (dst)[__i] = (src)[__i];                                          \
1386   } while (0)
1387 #endif
1388
1389 /* used by test programs, hence __GMP_DECLSPEC */
1390 #ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
1391 #define mpn_copyi __MPN(copyi)
1392 __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1393 #endif
1394
1395 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1396 #define MPN_COPY_INCR(dst, src, size)                   \
1397   do {                                                  \
1398     ASSERT ((size) >= 0);                               \
1399     ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
1400     mpn_copyi (dst, src, size);                         \
1401   } while (0)
1402 #endif
1403
1404 /* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
1405 #if ! defined (MPN_COPY_INCR)
1406 #define MPN_COPY_INCR(dst, src, n)                      \
1407   do {                                                  \
1408     ASSERT ((n) >= 0);                                  \
1409     ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \
1410     if ((n) != 0)                                       \
1411       {                                                 \
1412         mp_size_t __n = (n) - 1;                        \
1413         mp_ptr __dst = (dst);                           \
1414         mp_srcptr __src = (src);                        \
1415         mp_limb_t __x;                                  \
1416         __x = *__src++;                                 \
1417         if (__n != 0)                                   \
1418           {                                             \
1419             do                                          \
1420               {                                         \
1421                 *__dst++ = __x;                         \
1422                 __x = *__src++;                         \
1423               }                                         \
1424             while (--__n);                              \
1425           }                                             \
1426         *__dst++ = __x;                                 \
1427       }                                                 \
1428   } while (0)
1429 #endif
1430
1431
1432 #if defined (_CRAY)
1433 #define MPN_COPY_DECR(dst, src, n)                                      \
1434   do {                                                                  \
1435     int __i;            /* Faster on some Crays with plain int */       \
1436     _Pragma ("_CRI ivdep");                                             \
1437     for (__i = (n) - 1; __i >= 0; __i--)                                \
1438       (dst)[__i] = (src)[__i];                                          \
1439   } while (0)
1440 #endif
1441
1442 /* used by test programs, hence __GMP_DECLSPEC */
1443 #ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
1444 #define mpn_copyd __MPN(copyd)
1445 __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1446 #endif
1447
1448 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1449 #define MPN_COPY_DECR(dst, src, size)                   \
1450   do {                                                  \
1451     ASSERT ((size) >= 0);                               \
1452     ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
1453     mpn_copyd (dst, src, size);                         \
1454   } while (0)
1455 #endif
1456
1457 /* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
1458 #if ! defined (MPN_COPY_DECR)
1459 #define MPN_COPY_DECR(dst, src, n)                      \
1460   do {                                                  \
1461     ASSERT ((n) >= 0);                                  \
1462     ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \
1463     if ((n) != 0)                                       \
1464       {                                                 \
1465         mp_size_t __n = (n) - 1;                        \
1466         mp_ptr __dst = (dst) + __n;                     \
1467         mp_srcptr __src = (src) + __n;                  \
1468         mp_limb_t __x;                                  \
1469         __x = *__src--;                                 \
1470         if (__n != 0)                                   \
1471           {                                             \
1472             do                                          \
1473               {                                         \
1474                 *__dst-- = __x;                         \
1475                 __x = *__src--;                         \
1476               }                                         \
1477             while (--__n);                              \
1478           }                                             \
1479         *__dst-- = __x;                                 \
1480       }                                                 \
1481   } while (0)
1482 #endif
1483
1484
1485 #ifndef MPN_COPY
1486 #define MPN_COPY(d,s,n)                         \
1487   do {                                          \
1488     ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \
1489     MPN_COPY_INCR (d, s, n);                    \
1490   } while (0)
1491 #endif
1492
1493
1494 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1495 #define MPN_REVERSE(dst, src, size)                     \
1496   do {                                                  \
1497     mp_ptr     __dst = (dst);                           \
1498     mp_size_t  __size = (size);                         \
1499     mp_srcptr  __src = (src) + __size - 1;              \
1500     mp_size_t  __i;                                     \
1501     ASSERT ((size) >= 0);                               \
1502     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
1503     CRAY_Pragma ("_CRI ivdep");                         \
1504     for (__i = 0; __i < __size; __i++)                  \
1505       {                                                 \
1506         *__dst = *__src;                                \
1507         __dst++;                                        \
1508         __src--;                                        \
1509       }                                                 \
1510   } while (0)
1511
1512
1513 /* Zero n limbs at dst.
1514
1515    For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
1516    ppc630 for instance this is optimal since it can sustain only 1 store per
1517    cycle.
1518
1519    gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1520    "for" loop in the generic code below can become stu/bdnz.  The do/while
1521    here helps it get to that.  The same caveat about plain -mpowerpc64 mode
1522    applies here as to __GMPN_COPY_INCR in gmp.h.
1523
1524    xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1525    this loop too.
1526
1527    Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1528    at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
1529    trouble than it's worth to do the same, though perhaps a call to memset
1530    would be good when on a GNU system.  */
1531
1532 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1533 #define MPN_ZERO(dst, n)                        \
1534   do {                                          \
1535     ASSERT ((n) >= 0);                          \
1536     if ((n) != 0)                               \
1537       {                                         \
1538         mp_ptr __dst = (dst) - 1;               \
1539         mp_size_t __n = (n);                    \
1540         do                                      \
1541           *++__dst = 0;                         \
1542         while (--__n);                          \
1543       }                                         \
1544   } while (0)
1545 #endif
1546
1547 #ifndef MPN_ZERO
1548 #define MPN_ZERO(dst, n)                        \
1549   do {                                          \
1550     ASSERT ((n) >= 0);                          \
1551     if ((n) != 0)                               \
1552       {                                         \
1553         mp_ptr __dst = (dst);                   \
1554         mp_size_t __n = (n);                    \
1555         do                                      \
1556           *__dst++ = 0;                         \
1557         while (--__n);                          \
1558       }                                         \
1559   } while (0)
1560 #endif
1561
1562
1563 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1564    start up and would need to strip a lot of zeros before it'd be faster
1565    than a simple cmpl loop.  Here are some times in cycles for
1566    std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1567    low zeros).
1568
1569                 std   cld
1570            P5    18    16
1571            P6    46    38
1572            K6    36    13
1573            K7    21    20
1574 */
1575 #ifndef MPN_NORMALIZE
1576 #define MPN_NORMALIZE(DST, NLIMBS) \
1577   do {                                                                  \
1578     while ((NLIMBS) > 0)                                                \
1579       {                                                                 \
1580         if ((DST)[(NLIMBS) - 1] != 0)                                   \
1581           break;                                                        \
1582         (NLIMBS)--;                                                     \
1583       }                                                                 \
1584   } while (0)
1585 #endif
1586 #ifndef MPN_NORMALIZE_NOT_ZERO
1587 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \
1588   do {                                          \
1589     ASSERT ((NLIMBS) >= 1);                     \
1590     while (1)                                   \
1591       {                                         \
1592         if ((DST)[(NLIMBS) - 1] != 0)           \
1593           break;                                \
1594         (NLIMBS)--;                             \
1595       }                                         \
1596   } while (0)
1597 #endif
1598
1599 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1600    and decrementing size.  low should be ptr[0], and will be the new ptr[0]
1601    on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
1602    somewhere a non-zero limb.  */
1603 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \
1604   do {                                                  \
1605     ASSERT ((size) >= 1);                               \
1606     ASSERT ((low) == (ptr)[0]);                         \
1607                                                         \
1608     while ((low) == 0)                                  \
1609       {                                                 \
1610         (size)--;                                       \
1611         ASSERT ((size) >= 1);                           \
1612         (ptr)++;                                        \
1613         (low) = *(ptr);                                 \
1614       }                                                 \
1615   } while (0)
1616
1617 /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
1618    temporary variable; it will be automatically cleared out at function
1619    return.  We use __x here to make it possible to accept both mpz_ptr and
1620    mpz_t arguments.  */
1621 #define MPZ_TMP_INIT(X, NLIMBS)                                         \
1622   do {                                                                  \
1623     mpz_ptr __x = (X);                                                  \
1624     ASSERT ((NLIMBS) >= 1);                                             \
1625     __x->_mp_alloc = (NLIMBS);                                          \
1626     __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS);                              \
1627   } while (0)
1628
1629 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
1630 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))     \
1631                           ? (mp_ptr) _mpz_realloc(z,n)  \
1632                           : PTR(z))
1633
1634 #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
1635
1636
1637 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1638    f1p.
1639
1640    From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1641    nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
1642    number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1643
1644    The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1645    without good floating point.  There's +2 for rounding up, and a further
1646    +2 since at the last step x limbs are doubled into a 2x+1 limb region
1647    whereas the actual F[2k] value might be only 2x-1 limbs.
1648
1649    Note that a division is done first, since on a 32-bit system it's at
1650    least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
1651    about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1652    whatever a multiply of two 190Mbyte numbers takes.)
1653
1654    Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1655    worked into the multiplier.  */
1656
1657 #define MPN_FIB2_SIZE(n) \
1658   ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1659
1660
1661 /* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
1662    -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1663
1664    FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1665    F[n] + 2*F[n-1] fits in a limb.  */
1666
1667 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1668 #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
1669
1670 #define SIEVESIZE 512           /* FIXME: Allow gmp_init_primesieve to choose */
1671 typedef struct
1672 {
1673   unsigned long d;                 /* current index in s[] */
1674   unsigned long s0;                /* number corresponding to s[0] */
1675   unsigned long sqrt_s0;           /* misnomer for sqrt(s[SIEVESIZE-1]) */
1676   unsigned char s[SIEVESIZE + 1];  /* sieve table */
1677 } gmp_primesieve_t;
1678
1679 #define gmp_init_primesieve __gmp_init_primesieve
1680 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
1681
1682 #define gmp_nextprime __gmp_nextprime
1683 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
1684
1685
1686 #ifndef MUL_TOOM22_THRESHOLD
1687 #define MUL_TOOM22_THRESHOLD             30
1688 #endif
1689
1690 #ifndef MUL_TOOM33_THRESHOLD
1691 #define MUL_TOOM33_THRESHOLD            100
1692 #endif
1693
1694 #ifndef MUL_TOOM44_THRESHOLD
1695 #define MUL_TOOM44_THRESHOLD            300
1696 #endif
1697
1698 #ifndef MUL_TOOM6H_THRESHOLD
1699 #define MUL_TOOM6H_THRESHOLD            350
1700 #endif
1701
1702 #ifndef SQR_TOOM6_THRESHOLD
1703 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
1704 #endif
1705
1706 #ifndef MUL_TOOM8H_THRESHOLD
1707 #define MUL_TOOM8H_THRESHOLD            450
1708 #endif
1709
1710 #ifndef SQR_TOOM8_THRESHOLD
1711 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
1712 #endif
1713
1714 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
1715 #define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
1716 #endif
1717
1718 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
1719 #define MUL_TOOM32_TO_TOOM53_THRESHOLD  110
1720 #endif
1721
1722 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
1723 #define MUL_TOOM42_TO_TOOM53_THRESHOLD  100
1724 #endif
1725
1726 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
1727 #define MUL_TOOM42_TO_TOOM63_THRESHOLD  110
1728 #endif
1729
1730 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
1731    normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
1732    binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
1733    separate hard limit will have been defined.  Similarly for TOOM3.  */
1734 #ifndef MUL_TOOM22_THRESHOLD_LIMIT
1735 #define MUL_TOOM22_THRESHOLD_LIMIT  MUL_TOOM22_THRESHOLD
1736 #endif
1737 #ifndef MUL_TOOM33_THRESHOLD_LIMIT
1738 #define MUL_TOOM33_THRESHOLD_LIMIT  MUL_TOOM33_THRESHOLD
1739 #endif
1740 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT
1741 #define MULLO_BASECASE_THRESHOLD_LIMIT  MULLO_BASECASE_THRESHOLD
1742 #endif
1743
1744 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1745    mpn_mul_basecase.  Default is to use mpn_sqr_basecase from 0.  (Note that we
1746    certainly always want it if there's a native assembler mpn_sqr_basecase.)
1747
1748    If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
1749    before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
1750    threshold and SQR_TOOM2_THRESHOLD is 0.  This oddity arises more or less
1751    because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
1752    should be used, and that may be never.  */
1753
1754 #ifndef SQR_BASECASE_THRESHOLD
1755 #define SQR_BASECASE_THRESHOLD            0
1756 #endif
1757
1758 #ifndef SQR_TOOM2_THRESHOLD
1759 #define SQR_TOOM2_THRESHOLD              50
1760 #endif
1761
1762 #ifndef SQR_TOOM3_THRESHOLD
1763 #define SQR_TOOM3_THRESHOLD             120
1764 #endif
1765
1766 #ifndef SQR_TOOM4_THRESHOLD
1767 #define SQR_TOOM4_THRESHOLD             400
1768 #endif
1769
1770 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT.  */
1771 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1772 #define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
1773 #endif
1774
1775 #ifndef DC_DIV_QR_THRESHOLD
1776 #define DC_DIV_QR_THRESHOLD              50
1777 #endif
1778
1779 #ifndef DC_DIVAPPR_Q_THRESHOLD
1780 #define DC_DIVAPPR_Q_THRESHOLD          200
1781 #endif
1782
1783 #ifndef DC_BDIV_QR_THRESHOLD
1784 #define DC_BDIV_QR_THRESHOLD             50
1785 #endif
1786
1787 #ifndef DC_BDIV_Q_THRESHOLD
1788 #define DC_BDIV_Q_THRESHOLD             180
1789 #endif
1790
1791 #ifndef DIVEXACT_JEB_THRESHOLD
1792 #define DIVEXACT_JEB_THRESHOLD           25
1793 #endif
1794
1795 #ifndef INV_MULMOD_BNM1_THRESHOLD
1796 #define INV_MULMOD_BNM1_THRESHOLD  (5*MULMOD_BNM1_THRESHOLD)
1797 #endif
1798
1799 #ifndef INV_APPR_THRESHOLD
1800 #define INV_APPR_THRESHOLD         INV_NEWTON_THRESHOLD
1801 #endif
1802
1803 #ifndef INV_NEWTON_THRESHOLD
1804 #define INV_NEWTON_THRESHOLD            200
1805 #endif
1806
1807 #ifndef BINV_NEWTON_THRESHOLD
1808 #define BINV_NEWTON_THRESHOLD           300
1809 #endif
1810
1811 #ifndef MU_DIVAPPR_Q_THRESHOLD
1812 #define MU_DIVAPPR_Q_THRESHOLD         2000
1813 #endif
1814
1815 #ifndef MU_DIV_QR_THRESHOLD
1816 #define MU_DIV_QR_THRESHOLD            2000
1817 #endif
1818
1819 #ifndef MUPI_DIV_QR_THRESHOLD
1820 #define MUPI_DIV_QR_THRESHOLD           200
1821 #endif
1822
1823 #ifndef MU_BDIV_Q_THRESHOLD
1824 #define MU_BDIV_Q_THRESHOLD            2000
1825 #endif
1826
1827 #ifndef MU_BDIV_QR_THRESHOLD
1828 #define MU_BDIV_QR_THRESHOLD           2000
1829 #endif
1830
1831 #ifndef MULMOD_BNM1_THRESHOLD
1832 #define MULMOD_BNM1_THRESHOLD            16
1833 #endif
1834
1835 #ifndef SQRMOD_BNM1_THRESHOLD
1836 #define SQRMOD_BNM1_THRESHOLD            16
1837 #endif
1838
1839 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
1840 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD  (INV_MULMOD_BNM1_THRESHOLD/2)
1841 #endif
1842
1843 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
1844
1845 #ifndef REDC_1_TO_REDC_2_THRESHOLD
1846 #define REDC_1_TO_REDC_2_THRESHOLD       15
1847 #endif
1848 #ifndef REDC_2_TO_REDC_N_THRESHOLD
1849 #define REDC_2_TO_REDC_N_THRESHOLD      100
1850 #endif
1851
1852 #else
1853
1854 #ifndef REDC_1_TO_REDC_N_THRESHOLD
1855 #define REDC_1_TO_REDC_N_THRESHOLD      100
1856 #endif
1857
1858 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
1859
1860
1861 /* First k to use for an FFT modF multiply.  A modF FFT is an order
1862    log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1863    whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
1864 #define FFT_FIRST_K  4
1865
1866 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1867 #ifndef MUL_FFT_MODF_THRESHOLD
1868 #define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM33_THRESHOLD * 3)
1869 #endif
1870 #ifndef SQR_FFT_MODF_THRESHOLD
1871 #define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
1872 #endif
1873
1874 /* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
1875    will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1876    NxN->2N multiply and not recursing into itself is an order
1877    log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1878    is the first better than toom3.  */
1879 #ifndef MUL_FFT_THRESHOLD
1880 #define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
1881 #endif
1882 #ifndef SQR_FFT_THRESHOLD
1883 #define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
1884 #endif
1885
1886 /* Table of thresholds for successive modF FFT "k"s.  The first entry is
1887    where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1888    etc.  See mpn_fft_best_k(). */
1889 #ifndef MUL_FFT_TABLE
1890 #define MUL_FFT_TABLE                           \
1891   { MUL_TOOM33_THRESHOLD * 4,   /* k=5 */        \
1892     MUL_TOOM33_THRESHOLD * 8,   /* k=6 */        \
1893     MUL_TOOM33_THRESHOLD * 16,  /* k=7 */        \
1894     MUL_TOOM33_THRESHOLD * 32,  /* k=8 */        \
1895     MUL_TOOM33_THRESHOLD * 96,  /* k=9 */        \
1896     MUL_TOOM33_THRESHOLD * 288, /* k=10 */       \
1897     0 }
1898 #endif
1899 #ifndef SQR_FFT_TABLE
1900 #define SQR_FFT_TABLE                           \
1901   { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */        \
1902     SQR_TOOM3_THRESHOLD * 8,   /* k=6 */        \
1903     SQR_TOOM3_THRESHOLD * 16,  /* k=7 */        \
1904     SQR_TOOM3_THRESHOLD * 32,  /* k=8 */        \
1905     SQR_TOOM3_THRESHOLD * 96,  /* k=9 */        \
1906     SQR_TOOM3_THRESHOLD * 288, /* k=10 */       \
1907     0 }
1908 #endif
1909
1910 struct fft_table_nk
1911 {
1912   unsigned int n:27;
1913   unsigned int k:5;
1914 };
1915
1916 #ifndef FFT_TABLE_ATTRS
1917 #define FFT_TABLE_ATTRS   static const
1918 #endif
1919
1920 #define MPN_FFT_TABLE_SIZE  16
1921
1922
1923 #ifndef DC_DIV_QR_THRESHOLD
1924 #define DC_DIV_QR_THRESHOLD    (3 * MUL_TOOM22_THRESHOLD)
1925 #endif
1926
1927 #ifndef GET_STR_DC_THRESHOLD
1928 #define GET_STR_DC_THRESHOLD             18
1929 #endif
1930
1931 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
1932 #define GET_STR_PRECOMPUTE_THRESHOLD     35
1933 #endif
1934
1935 #ifndef SET_STR_DC_THRESHOLD
1936 #define SET_STR_DC_THRESHOLD            750
1937 #endif
1938
1939 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
1940 #define SET_STR_PRECOMPUTE_THRESHOLD   2000
1941 #endif
1942
1943 /* Return non-zero if xp,xsize and yp,ysize overlap.
1944    If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1945    overlap.  If both these are false, there's an overlap. */
1946 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1947   ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1948 #define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \
1949   (   (char *) (xp) + (xsize) > (char *) (yp)   \
1950    && (char *) (yp) + (ysize) > (char *) (xp))
1951
1952 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1953    overlapping.  Return zero if they're partially overlapping. */
1954 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \
1955   MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1956 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \
1957   ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1958
1959 /* Return non-zero if dst,dsize and src,ssize are either identical or
1960    overlapping in a way suitable for an incrementing/decrementing algorithm.
1961    Return zero if they're partially overlapping in an unsuitable fashion. */
1962 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \
1963   ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1964 #define MPN_SAME_OR_INCR_P(dst, src, size)      \
1965   MPN_SAME_OR_INCR2_P(dst, size, src, size)
1966 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \
1967   ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1968 #define MPN_SAME_OR_DECR_P(dst, src, size)      \
1969   MPN_SAME_OR_DECR2_P(dst, size, src, size)
1970
1971
1972 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1973    ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1974    does it always.  Generally assertions are meant for development, but
1975    might help when looking for a problem later too.
1976
1977    Note that strings shouldn't be used within the ASSERT expression,
1978    eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1979    used in the !HAVE_STRINGIZE case (ie. K&R).  */
1980
1981 #ifdef __LINE__
1982 #define ASSERT_LINE  __LINE__
1983 #else
1984 #define ASSERT_LINE  -1
1985 #endif
1986
1987 #ifdef __FILE__
1988 #define ASSERT_FILE  __FILE__
1989 #else
1990 #define ASSERT_FILE  ""
1991 #endif
1992
1993 __GMP_DECLSPEC void __gmp_assert_header __GMP_PROTO ((const char *, int));
1994 __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
1995
1996 #if HAVE_STRINGIZE
1997 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1998 #else
1999 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
2000 #endif
2001
2002 #define ASSERT_ALWAYS(expr)     \
2003   do {                          \
2004     if (!(expr))                \
2005       ASSERT_FAIL (expr);       \
2006   } while (0)
2007
2008 #if WANT_ASSERT
2009 #define ASSERT(expr)   ASSERT_ALWAYS (expr)
2010 #else
2011 #define ASSERT(expr)   do {} while (0)
2012 #endif
2013
2014
2015 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
2016    that it's zero.  In both cases if assertion checking is disabled the
2017    expression is still evaluated.  These macros are meant for use with
2018    routines like mpn_add_n() where the return value represents a carry or
2019    whatever that should or shouldn't occur in some context.  For example,
2020    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
2021 #if WANT_ASSERT
2022 #define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
2023 #define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
2024 #else
2025 #define ASSERT_CARRY(expr)     (expr)
2026 #define ASSERT_NOCARRY(expr)   (expr)
2027 #endif
2028
2029
2030 /* ASSERT_CODE includes code when assertion checking is wanted.  This is the
2031    same as writing "#if WANT_ASSERT", but more compact.  */
2032 #if WANT_ASSERT
2033 #define ASSERT_CODE(expr)  expr
2034 #else
2035 #define ASSERT_CODE(expr)
2036 #endif
2037
2038
2039 /* Test that an mpq_t is in fully canonical form.  This can be used as
2040    protection on routines like mpq_equal which give wrong results on
2041    non-canonical inputs.  */
2042 #if WANT_ASSERT
2043 #define ASSERT_MPQ_CANONICAL(q)                         \
2044   do {                                                  \
2045     ASSERT (q->_mp_den._mp_size > 0);                   \
2046     if (q->_mp_num._mp_size == 0)                       \
2047       {                                                 \
2048         /* zero should be 0/1 */                        \
2049         ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \
2050       }                                                 \
2051     else                                                \
2052       {                                                 \
2053         /* no common factors */                         \
2054         mpz_t  __g;                                     \
2055         mpz_init (__g);                                 \
2056         mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \
2057         ASSERT (mpz_cmp_ui (__g, 1) == 0);              \
2058         mpz_clear (__g);                                \
2059       }                                                 \
2060   } while (0)
2061 #else
2062 #define ASSERT_MPQ_CANONICAL(q)  do {} while (0)
2063 #endif
2064
2065 /* Check that the nail parts are zero. */
2066 #define ASSERT_ALWAYS_LIMB(limb)                \
2067   do {                                          \
2068     mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \
2069     ASSERT_ALWAYS (__nail == 0);                \
2070   } while (0)
2071 #define ASSERT_ALWAYS_MPN(ptr, size)            \
2072   do {                                          \
2073     /* let whole loop go dead when no nails */  \
2074     if (GMP_NAIL_BITS != 0)                     \
2075       {                                         \
2076         mp_size_t  __i;                         \
2077         for (__i = 0; __i < (size); __i++)      \
2078           ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \
2079       }                                         \
2080   } while (0)
2081 #if WANT_ASSERT
2082 #define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
2083 #define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
2084 #else
2085 #define ASSERT_LIMB(limb)       do {} while (0)
2086 #define ASSERT_MPN(ptr, size)   do {} while (0)
2087 #endif
2088
2089
2090 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
2091    size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
2092 #if WANT_ASSERT
2093 #define ASSERT_MPN_ZERO_P(ptr,size)     \
2094   do {                                  \
2095     mp_size_t  __i;                     \
2096     ASSERT ((size) >= 0);               \
2097     for (__i = 0; __i < (size); __i++)  \
2098       ASSERT ((ptr)[__i] == 0);         \
2099   } while (0)
2100 #define ASSERT_MPN_NONZERO_P(ptr,size)  \
2101   do {                                  \
2102     mp_size_t  __i;                     \
2103     int        __nonzero = 0;           \
2104     ASSERT ((size) >= 0);               \
2105     for (__i = 0; __i < (size); __i++)  \
2106       if ((ptr)[__i] != 0)              \
2107         {                               \
2108           __nonzero = 1;                \
2109           break;                        \
2110         }                               \
2111     ASSERT (__nonzero);                 \
2112   } while (0)
2113 #else
2114 #define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
2115 #define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
2116 #endif
2117
2118
2119 #if ! HAVE_NATIVE_mpn_com
2120 #undef mpn_com
2121 #define mpn_com(d,s,n)                                  \
2122   do {                                                  \
2123     mp_ptr     __d = (d);                               \
2124     mp_srcptr  __s = (s);                               \
2125     mp_size_t  __n = (n);                               \
2126     ASSERT (__n >= 1);                                  \
2127     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \
2128     do                                                  \
2129       *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \
2130     while (--__n);                                      \
2131   } while (0)
2132 #endif
2133
2134 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation)                   \
2135   do {                                                                  \
2136     mp_srcptr   __up = (up);                                            \
2137     mp_srcptr   __vp = (vp);                                            \
2138     mp_ptr      __rp = (rp);                                            \
2139     mp_size_t   __n = (n);                                              \
2140     mp_limb_t __a, __b;                                                 \
2141     ASSERT (__n > 0);                                                   \
2142     ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n));                  \
2143     ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n));                  \
2144     __up += __n;                                                        \
2145     __vp += __n;                                                        \
2146     __rp += __n;                                                        \
2147     __n = -__n;                                                         \
2148     do {                                                                \
2149       __a = __up[__n];                                                  \
2150       __b = __vp[__n];                                                  \
2151       __rp[__n] = operation;                                            \
2152     } while (++__n);                                                    \
2153   } while (0)
2154
2155
2156 #if ! HAVE_NATIVE_mpn_and_n
2157 #undef mpn_and_n
2158 #define mpn_and_n(rp, up, vp, n) \
2159   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
2160 #endif
2161
2162 #if ! HAVE_NATIVE_mpn_andn_n
2163 #undef mpn_andn_n
2164 #define mpn_andn_n(rp, up, vp, n) \
2165   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
2166 #endif
2167
2168 #if ! HAVE_NATIVE_mpn_nand_n
2169 #undef mpn_nand_n
2170 #define mpn_nand_n(rp, up, vp, n) \
2171   MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
2172 #endif
2173
2174 #if ! HAVE_NATIVE_mpn_ior_n
2175 #undef mpn_ior_n
2176 #define mpn_ior_n(rp, up, vp, n) \
2177   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
2178 #endif
2179
2180 #if ! HAVE_NATIVE_mpn_iorn_n
2181 #undef mpn_iorn_n
2182 #define mpn_iorn_n(rp, up, vp, n) \
2183   MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
2184 #endif
2185
2186 #if ! HAVE_NATIVE_mpn_nior_n
2187 #undef mpn_nior_n
2188 #define mpn_nior_n(rp, up, vp, n) \
2189   MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
2190 #endif
2191
2192 #if ! HAVE_NATIVE_mpn_xor_n
2193 #undef mpn_xor_n
2194 #define mpn_xor_n(rp, up, vp, n) \
2195   MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
2196 #endif
2197
2198 #if ! HAVE_NATIVE_mpn_xnor_n
2199 #undef mpn_xnor_n
2200 #define mpn_xnor_n(rp, up, vp, n) \
2201   MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
2202 #endif
2203
2204 #define mpn_trialdiv __MPN(trialdiv)
2205 __GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *));
2206
2207 #define mpn_remove __MPN(remove)
2208 __GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t));
2209
2210
2211 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2212 #if GMP_NAIL_BITS == 0
2213 #define ADDC_LIMB(cout, w, x, y)        \
2214   do {                                  \
2215     mp_limb_t  __x = (x);               \
2216     mp_limb_t  __y = (y);               \
2217     mp_limb_t  __w = __x + __y;         \
2218     (w) = __w;                          \
2219     (cout) = __w < __x;                 \
2220   } while (0)
2221 #else
2222 #define ADDC_LIMB(cout, w, x, y)        \
2223   do {                                  \
2224     mp_limb_t  __w;                     \
2225     ASSERT_LIMB (x);                    \
2226     ASSERT_LIMB (y);                    \
2227     __w = (x) + (y);                    \
2228     (w) = __w & GMP_NUMB_MASK;          \
2229     (cout) = __w >> GMP_NUMB_BITS;      \
2230   } while (0)
2231 #endif
2232
2233 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2234    subtract.  */
2235 #if GMP_NAIL_BITS == 0
2236 #define SUBC_LIMB(cout, w, x, y)        \
2237   do {                                  \
2238     mp_limb_t  __x = (x);               \
2239     mp_limb_t  __y = (y);               \
2240     mp_limb_t  __w = __x - __y;         \
2241     (w) = __w;                          \
2242     (cout) = __w > __x;                 \
2243   } while (0)
2244 #else
2245 #define SUBC_LIMB(cout, w, x, y)        \
2246   do {                                  \
2247     mp_limb_t  __w = (x) - (y);         \
2248     (w) = __w & GMP_NUMB_MASK;          \
2249     (cout) = __w >> (GMP_LIMB_BITS-1);  \
2250   } while (0)
2251 #endif
2252
2253
2254 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2255    expecting no carry (or borrow) from that.
2256
2257    The size parameter is only for the benefit of assertion checking.  In a
2258    normal build it's unused and the carry/borrow is just propagated as far
2259    as it needs to go.
2260
2261    On random data, usually only one or two limbs of {ptr,size} get updated,
2262    so there's no need for any sophisticated looping, just something compact
2263    and sensible.
2264
2265    FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2266    declaring their operand sizes, then remove the former.  This is purely
2267    for the benefit of assertion checking.  */
2268
2269 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0      \
2270   && GMP_LIMB_BITS == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
2271 /* Better flags handling than the generic C gives on i386, saving a few
2272    bytes of code and maybe a cycle or two.  */
2273
2274 #define MPN_IORD_U(ptr, incr, aors)                                     \
2275   do {                                                                  \
2276     mp_ptr  __ptr_dummy;                                                \
2277     if (__builtin_constant_p (incr) && (incr) == 1)                     \
2278       {                                                                 \
2279         __asm__ __volatile__                                            \
2280           ("\n" ASM_L(top) ":\n"                                        \
2281            "\t" aors " $1, (%0)\n"                                      \
2282            "\tleal 4(%0),%0\n"                                          \
2283            "\tjc " ASM_L(top)                                           \
2284            : "=r" (__ptr_dummy)                                         \
2285            : "0"  (ptr)                                                 \
2286            : "memory");                                                 \
2287       }                                                                 \
2288     else                                                                \
2289       {                                                                 \
2290         __asm__ __volatile__                                            \
2291           (   aors  " %2,(%0)\n"                                        \
2292            "\tjnc " ASM_L(done) "\n"                                    \
2293            ASM_L(top) ":\n"                                             \
2294            "\t" aors " $1,4(%0)\n"                                      \
2295            "\tleal 4(%0),%0\n"                                          \
2296            "\tjc " ASM_L(top) "\n"                                      \
2297            ASM_L(done) ":\n"                                            \
2298            : "=r" (__ptr_dummy)                                         \
2299            : "0"  (ptr),                                                \
2300              "ri" (incr)                                                \
2301            : "memory");                                                 \
2302       }                                                                 \
2303   } while (0)
2304
2305 #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
2306 #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
2307 #define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
2308 #define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
2309 #endif
2310
2311 #if GMP_NAIL_BITS == 0
2312 #ifndef mpn_incr_u
2313 #define mpn_incr_u(p,incr)                              \
2314   do {                                                  \
2315     mp_limb_t __x;                                      \
2316     mp_ptr __p = (p);                                   \
2317     if (__builtin_constant_p (incr) && (incr) == 1)     \
2318       {                                                 \
2319         while (++(*(__p++)) == 0)                       \
2320           ;                                             \
2321       }                                                 \
2322     else                                                \
2323       {                                                 \
2324         __x = *__p + (incr);                            \
2325         *__p = __x;                                     \
2326         if (__x < (incr))                               \
2327           while (++(*(++__p)) == 0)                     \
2328             ;                                           \
2329       }                                                 \
2330   } while (0)
2331 #endif
2332 #ifndef mpn_decr_u
2333 #define mpn_decr_u(p,incr)                              \
2334   do {                                                  \
2335     mp_limb_t __x;                                      \
2336     mp_ptr __p = (p);                                   \
2337     if (__builtin_constant_p (incr) && (incr) == 1)     \
2338       {                                                 \
2339         while ((*(__p++))-- == 0)                       \
2340           ;                                             \
2341       }                                                 \
2342     else                                                \
2343       {                                                 \
2344         __x = *__p;                                     \
2345         *__p = __x - (incr);                            \
2346         if (__x < (incr))                               \
2347           while ((*(++__p))-- == 0)                     \
2348             ;                                           \
2349       }                                                 \
2350   } while (0)
2351 #endif
2352 #endif
2353
2354 #if GMP_NAIL_BITS >= 1
2355 #ifndef mpn_incr_u
2356 #define mpn_incr_u(p,incr)                              \
2357   do {                                                  \
2358     mp_limb_t __x;                                      \
2359     mp_ptr __p = (p);                                   \
2360     if (__builtin_constant_p (incr) && (incr) == 1)     \
2361       {                                                 \
2362         do                                              \
2363           {                                             \
2364             __x = (*__p + 1) & GMP_NUMB_MASK;           \
2365             *__p++ = __x;                               \
2366           }                                             \
2367         while (__x == 0);                               \
2368       }                                                 \
2369     else                                                \
2370       {                                                 \
2371         __x = (*__p + (incr));                          \
2372         *__p++ = __x & GMP_NUMB_MASK;                   \
2373         if (__x >> GMP_NUMB_BITS != 0)                  \
2374           {                                             \
2375             do                                          \
2376               {                                         \
2377                 __x = (*__p + 1) & GMP_NUMB_MASK;       \
2378                 *__p++ = __x;                           \
2379               }                                         \
2380             while (__x == 0);                           \
2381           }                                             \
2382       }                                                 \
2383   } while (0)
2384 #endif
2385 #ifndef mpn_decr_u
2386 #define mpn_decr_u(p,incr)                              \
2387   do {                                                  \
2388     mp_limb_t __x;                                      \
2389     mp_ptr __p = (p);                                   \
2390     if (__builtin_constant_p (incr) && (incr) == 1)     \
2391       {                                                 \
2392         do                                              \
2393           {                                             \
2394             __x = *__p;                                 \
2395             *__p++ = (__x - 1) & GMP_NUMB_MASK;         \
2396           }                                             \
2397         while (__x == 0);                               \
2398       }                                                 \
2399     else                                                \
2400       {                                                 \
2401         __x = *__p - (incr);                            \
2402         *__p++ = __x & GMP_NUMB_MASK;                   \
2403         if (__x >> GMP_NUMB_BITS != 0)                  \
2404           {                                             \
2405             do                                          \
2406               {                                         \
2407                 __x = *__p;                             \
2408                 *__p++ = (__x - 1) & GMP_NUMB_MASK;     \
2409               }                                         \
2410             while (__x == 0);                           \
2411           }                                             \
2412       }                                                 \
2413   } while (0)
2414 #endif
2415 #endif
2416
2417 #ifndef MPN_INCR_U
2418 #if WANT_ASSERT
2419 #define MPN_INCR_U(ptr, size, n)                        \
2420   do {                                                  \
2421     ASSERT ((size) >= 1);                               \
2422     ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \
2423   } while (0)
2424 #else
2425 #define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
2426 #endif
2427 #endif
2428
2429 #ifndef MPN_DECR_U
2430 #if WANT_ASSERT
2431 #define MPN_DECR_U(ptr, size, n)                        \
2432   do {                                                  \
2433     ASSERT ((size) >= 1);                               \
2434     ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \
2435   } while (0)
2436 #else
2437 #define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
2438 #endif
2439 #endif
2440
2441
2442 /* Structure for conversion between internal binary format and
2443    strings in base 2..36.  */
2444 struct bases
2445 {
2446   /* Number of digits in the conversion base that always fits in an mp_limb_t.
2447      For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2448      is 9, since 10**9 is the largest number that fits into a mp_limb_t.  */
2449   int chars_per_limb;
2450
2451   /* log(2)/log(conversion_base) */
2452   double chars_per_bit_exactly;
2453
2454   /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2455      factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
2456      i.e. the number of bits used to represent each digit in the base.  */
2457   mp_limb_t big_base;
2458
2459   /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
2460      fixed-point number.  Instead of dividing by big_base an application can
2461      choose to multiply by big_base_inverted.  */
2462   mp_limb_t big_base_inverted;
2463 };
2464
2465 #define   mp_bases __MPN(bases)
2466 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2467
2468
2469 /* For power of 2 bases this is exact.  For other bases the result is either
2470    exact or one too big.
2471
2472    To be exact always it'd be necessary to examine all the limbs of the
2473    operand, since numbers like 100..000 and 99...999 generally differ only
2474    in the lowest limb.  It'd be possible to examine just a couple of high
2475    limbs to increase the probability of being exact, but that doesn't seem
2476    worth bothering with.  */
2477
2478 #define MPN_SIZEINBASE(result, ptr, size, base)                         \
2479   do {                                                                  \
2480     int       __lb_base, __cnt;                                         \
2481     size_t __totbits;                                                   \
2482                                                                         \
2483     ASSERT ((size) >= 0);                                               \
2484     ASSERT ((base) >= 2);                                               \
2485     ASSERT ((base) < numberof (mp_bases));                              \
2486                                                                         \
2487     /* Special case for X == 0.  */                                     \
2488     if ((size) == 0)                                                    \
2489       (result) = 1;                                                     \
2490     else                                                                \
2491       {                                                                 \
2492         /* Calculate the total number of significant bits of X.  */     \
2493         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2494         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2495                                                                         \
2496         if (POW2_P (base))                                              \
2497           {                                                             \
2498             __lb_base = mp_bases[base].big_base;                        \
2499             (result) = (__totbits + __lb_base - 1) / __lb_base;         \
2500           }                                                             \
2501         else                                                            \
2502           (result) = (size_t)                                           \
2503             (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \
2504       }                                                                 \
2505   } while (0)
2506
2507 /* eliminate mp_bases lookups for base==16 */
2508 #define MPN_SIZEINBASE_16(result, ptr, size)                            \
2509   do {                                                                  \
2510     int       __cnt;                                                    \
2511     mp_size_t __totbits;                                                \
2512                                                                         \
2513     ASSERT ((size) >= 0);                                               \
2514                                                                         \
2515     /* Special case for X == 0.  */                                     \
2516     if ((size) == 0)                                                    \
2517       (result) = 1;                                                     \
2518     else                                                                \
2519       {                                                                 \
2520         /* Calculate the total number of significant bits of X.  */     \
2521         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2522         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2523         (result) = (__totbits + 4 - 1) / 4;                             \
2524       }                                                                 \
2525   } while (0)
2526
2527 /* bit count to limb count, rounding up */
2528 #define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2529
2530 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
2531    mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
2532    in both cases (LIMBS_PER_ULONG is also defined here.) */
2533 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2534
2535 #define LIMBS_PER_ULONG 1
2536 #define MPN_SET_UI(zp, zn, u)   \
2537   (zp)[0] = (u);                \
2538   (zn) = ((zp)[0] != 0);
2539 #define MPZ_FAKE_UI(z, zp, u)   \
2540   (zp)[0] = (u);                \
2541   PTR (z) = (zp);               \
2542   SIZ (z) = ((zp)[0] != 0);     \
2543   ASSERT_CODE (ALLOC (z) = 1);
2544
2545 #else /* need two limbs per ulong */
2546
2547 #define LIMBS_PER_ULONG 2
2548 #define MPN_SET_UI(zp, zn, u)                          \
2549   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2550   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2551   (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2552 #define MPZ_FAKE_UI(z, zp, u)                          \
2553   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2554   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2555   SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2556   PTR (z) = (zp);                                      \
2557   ASSERT_CODE (ALLOC (z) = 2);
2558
2559 #endif
2560
2561
2562 #if HAVE_HOST_CPU_FAMILY_x86
2563 #define TARGET_REGISTER_STARVED 1
2564 #else
2565 #define TARGET_REGISTER_STARVED 0
2566 #endif
2567
2568
2569 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2570    or 0 there into a limb 0xFF..FF or 0 respectively.
2571
2572    On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2573    but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2574    a little compile-time test and a fallback to a "? :" form.  The latter is
2575    necessary for instance on Cray vector systems.
2576
2577    Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2578    to an arithmetic right shift anyway, but it's good to get the desired
2579    shift on past versions too (in particular since an important use of
2580    LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
2581
2582 #define LIMB_HIGHBIT_TO_MASK(n)                                 \
2583   (((mp_limb_signed_t) -1 >> 1) < 0                             \
2584    ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)              \
2585    : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2586
2587
2588 /* Use a library function for invert_limb, if available. */
2589 #define   mpn_invert_limb __MPN(invert_limb)
2590 __GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2591 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2592 #define invert_limb(invxl,xl)           \
2593   do {                                  \
2594     (invxl) = mpn_invert_limb (xl);     \
2595   } while (0)
2596 #endif
2597
2598 #ifndef invert_limb
2599 #define invert_limb(invxl,xl)                   \
2600   do {                                          \
2601     mp_limb_t dummy;                            \
2602     ASSERT ((xl) != 0);                         \
2603     udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl);  \
2604   } while (0)
2605 #endif
2606
2607 #define invert_pi1(dinv, d1, d0)                                \
2608   do {                                                          \
2609     mp_limb_t v, p, t1, t0, mask;                               \
2610     invert_limb (v, d1);                                        \
2611     p = d1 * v;                                                 \
2612     p += d0;                                                    \
2613     if (p < d0)                                                 \
2614       {                                                         \
2615         v--;                                                    \
2616         mask = -(p >= d1);                                      \
2617         p -= d1;                                                \
2618         v += mask;                                              \
2619         p -= mask & d1;                                         \
2620       }                                                         \
2621     umul_ppmm (t1, t0, d0, v);                                  \
2622     p += t1;                                                    \
2623     if (p < t1)                                                 \
2624       {                                                         \
2625         v--;                                                    \
2626         if (UNLIKELY (p >= d1))                                 \
2627           {                                                     \
2628             if (p > d1 || t0 >= d0)                             \
2629               v--;                                              \
2630           }                                                     \
2631       }                                                         \
2632     (dinv).inv32 = v;                                           \
2633   } while (0)
2634
2635
2636 #ifndef udiv_qrnnd_preinv
2637 #define udiv_qrnnd_preinv udiv_qrnnd_preinv3
2638 #endif
2639
2640 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2641    limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS).
2642    If this would yield overflow, DI should be the largest possible number
2643    (i.e., only ones).  For correct operation, the most significant bit of D
2644    has to be set.  Put the quotient in Q and the remainder in R.  */
2645 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)                         \
2646   do {                                                                  \
2647     mp_limb_t _q, _ql, _r;                                              \
2648     mp_limb_t _xh, _xl;                                                 \
2649     ASSERT ((d) != 0);                                                  \
2650     umul_ppmm (_q, _ql, (nh), (di));                                    \
2651     _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */      \
2652     umul_ppmm (_xh, _xl, _q, (d));                                      \
2653     sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                         \
2654     if (_xh != 0)                                                       \
2655       {                                                                 \
2656         sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
2657         _q += 1;                                                        \
2658         if (_xh != 0)                                                   \
2659           {                                                             \
2660             _r -= (d);                                                  \
2661             _q += 1;                                                    \
2662           }                                                             \
2663       }                                                                 \
2664     if (_r >= (d))                                                      \
2665       {                                                                 \
2666         _r -= (d);                                                      \
2667         _q += 1;                                                        \
2668       }                                                                 \
2669     (r) = _r;                                                           \
2670     (q) = _q;                                                           \
2671   } while (0)
2672
2673 /* Like udiv_qrnnd_preinv, but branch-free. */
2674 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)                         \
2675   do {                                                                  \
2676     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2677     mp_limb_t _xh, _xl;                                                 \
2678     _n2 = (nh);                                                         \
2679     _n10 = (nl);                                                        \
2680     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2681     _nadj = _n10 + (_nmask & (d));                                      \
2682     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2683     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2684     _q1 = ~_xh;                                                         \
2685     umul_ppmm (_xh, _xl, _q1, d);                                       \
2686     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2687     _xh -= (d);                                 /* xh = 0 or -1 */      \
2688     (r) = _xl + ((d) & _xh);                                            \
2689     (q) = _xh - _q1;                                                    \
2690   } while (0)
2691
2692 /* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left
2693    so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
2694 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2695   do {                                                                  \
2696     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2697     mp_limb_t _xh, _xl;                                                 \
2698     _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1));  \
2699     _n10 = (nl) << (GMP_LIMB_BITS - (lgup));                            \
2700     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2701     _nadj = _n10 + (_nmask & (dnorm));                                  \
2702     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2703     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2704     _q1 = ~_xh;                                                         \
2705     umul_ppmm (_xh, _xl, _q1, d);                                       \
2706     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2707     _xh -= (d);                                                         \
2708     (r) = _xl + ((d) & _xh);                                            \
2709     (q) = _xh - _q1;                                                    \
2710   } while (0)
2711
2712 /* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
2713
2714    We write things strangely below, to help gcc.  A more straightforward
2715    version:
2716
2717    _r = (nl) - _qh * (d);
2718    _t = _r + (d);
2719    if (_r >= _ql)
2720      {
2721        _qh--;
2722        _r = _t;
2723      }
2724
2725    For one operation shorter critical path, one may want to use this form:
2726
2727    _p = _qh * (d)
2728    _s = (nl) + (d);
2729    _r = (nl) - _p;
2730    _t = _s - _p;
2731    if (_r >= _ql)
2732      {
2733        _qh--;
2734        _r = _t;
2735      }
2736 */
2737 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di)                         \
2738   do {                                                                  \
2739     mp_limb_t _qh, _ql, _r;                                             \
2740     umul_ppmm (_qh, _ql, (nh), (di));                                   \
2741     if (__builtin_constant_p (nl) && (nl) == 0)                         \
2742       _qh += (nh) + 1;                                                  \
2743     else                                                                \
2744       add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));                  \
2745     _r = (nl) - _qh * (d);                                              \
2746     if (_r > _ql)       /* both > and >= should be OK */                \
2747       {                                                                 \
2748         _r += (d);                                                      \
2749         _qh--;                                                          \
2750       }                                                                 \
2751     if (UNLIKELY (_r >= (d)))                                           \
2752       {                                                                 \
2753         _r -= (d);                                                      \
2754         _qh++;                                                          \
2755       }                                                                 \
2756     (r) = _r;                                                           \
2757     (q) = _qh;                                                          \
2758   } while (0)
2759
2760 /* Compute r = nh*B mod d, where di is the inverse of d.  */
2761 #define udiv_rnd_preinv(r, nh, d, di)                                   \
2762   do {                                                                  \
2763     mp_limb_t _qh, _ql, _r;                                             \
2764     umul_ppmm (_qh, _ql, (nh), (di));                                   \
2765     _qh += (nh) + 1;                                                    \
2766     _r = - _qh * (d);                                                   \
2767     if (_r > _ql)                                                       \
2768       _r += (d);                                                        \
2769     (r) = _r;                                                           \
2770   } while (0)
2771
2772 /* Compute quotient the quotient and remainder for n / d. Requires d
2773    >= B^2 / 2 and n < d B. di is the inverse
2774
2775      floor ((B^3 - 1) / (d0 + d1 B)) - B.
2776
2777    NOTE: Output variables are updated multiple times. Only some inputs
2778    and outputs may overlap.
2779 */
2780 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)               \
2781   do {                                                                  \
2782     mp_limb_t _q0, _t1, _t0, _mask;                                     \
2783     umul_ppmm ((q), _q0, (n2), (dinv));                                 \
2784     add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));                        \
2785                                                                         \
2786     /* Compute the two most significant limbs of n - q'd */             \
2787     (r1) = (n1) - (d1) * (q);                                           \
2788     (r0) = (n0);                                                        \
2789     sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));                    \
2790     umul_ppmm (_t1, _t0, (d0), (q));                                    \
2791     sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);                      \
2792     (q)++;                                                              \
2793                                                                         \
2794     /* Conditionally adjust q and the remainders */                     \
2795     _mask = - (mp_limb_t) ((r1) >= _q0);                                \
2796     (q) += _mask;                                                       \
2797     add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0));    \
2798     if (UNLIKELY ((r1) >= (d1)))                                        \
2799       {                                                                 \
2800         if ((r1) > (d1) || (r0) >= (d0))                                \
2801           {                                                             \
2802             (q)++;                                                      \
2803             sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));            \
2804           }                                                             \
2805       }                                                                 \
2806   } while (0)
2807
2808 #ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
2809 #define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
2810 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2811 #endif
2812
2813
2814 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
2815    plain mpn_divrem_1.  The default is yes, since the few CISC chips where
2816    preinv is not good have defines saying so.  */
2817 #ifndef USE_PREINV_DIVREM_1
2818 #define USE_PREINV_DIVREM_1   1
2819 #endif
2820
2821 #if USE_PREINV_DIVREM_1
2822 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2823   mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2824 #else
2825 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2826   mpn_divrem_1 (qp, xsize, ap, size, d)
2827 #endif
2828
2829 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
2830 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
2831 #endif
2832
2833 /* This selection may seem backwards.  The reason mpn_mod_1 typically takes
2834    over for larger sizes is that it uses the mod_1_1 function.  */
2835 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)               \
2836   (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD)              \
2837    ? mpn_preinv_mod_1 (src, size, divisor, inverse)                     \
2838    : mpn_mod_1 (src, size, divisor))
2839
2840
2841 #ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
2842 #define   mpn_mod_34lsub1 __MPN(mod_34lsub1)
2843 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2844 #endif
2845
2846
2847 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2848    plain mpn_divrem_1.  Likewise BMOD_1_TO_MOD_1_THRESHOLD for
2849    mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
2850    modexact are faster at all sizes, so the defaults are 0.  Those CPUs
2851    where this is not right have a tuned threshold.  */
2852 #ifndef DIVEXACT_1_THRESHOLD
2853 #define DIVEXACT_1_THRESHOLD  0
2854 #endif
2855 #ifndef BMOD_1_TO_MOD_1_THRESHOLD
2856 #define BMOD_1_TO_MOD_1_THRESHOLD  10
2857 #endif
2858
2859 #ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */
2860 #define mpn_divexact_1 __MPN(divexact_1)
2861 __GMP_DECLSPEC void    mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2862 #endif
2863
2864 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \
2865   do {                                                                        \
2866     if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \
2867       ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2868     else                                                                      \
2869       {                                                                       \
2870         ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \
2871         mpn_divexact_1 (dst, src, size, divisor);                             \
2872       }                                                                       \
2873   } while (0)
2874
2875 #ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
2876 #define   mpn_modexact_1c_odd __MPN(modexact_1c_odd)
2877 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2878 #endif
2879
2880 #if HAVE_NATIVE_mpn_modexact_1_odd
2881 #define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
2882 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2883 #else
2884 #define mpn_modexact_1_odd(src,size,divisor) \
2885   mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2886 #endif
2887
2888 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)                     \
2889   (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD)                    \
2890    ? mpn_modexact_1_odd (src, size, divisor)                            \
2891    : mpn_mod_1 (src, size, divisor))
2892
2893 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
2894    2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2895    n must be odd (otherwise such an inverse doesn't exist).
2896
2897    This is not to be confused with invert_limb(), which is completely
2898    different.
2899
2900    The table lookup gives an inverse with the low 8 bits valid, and each
2901    multiply step doubles the number of bits.  See Jebelean "An algorithm for
2902    exact division" end of section 4 (reference in gmp.texi).
2903
2904    Possible enhancement: Could use UHWtype until the last step, if half-size
2905    multiplies are faster (might help under _LONG_LONG_LIMB).
2906
2907    Alternative: As noted in Granlund and Montgomery "Division by Invariant
2908    Integers using Multiplication" (reference in gmp.texi), n itself gives a
2909    3-bit inverse immediately, and could be used instead of a table lookup.
2910    A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2911    bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
2912
2913 #define binvert_limb_table  __gmp_binvert_limb_table
2914 __GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
2915
2916 #define binvert_limb(inv,n)                                             \
2917   do {                                                                  \
2918     mp_limb_t  __n = (n);                                               \
2919     mp_limb_t  __inv;                                                   \
2920     ASSERT ((__n & 1) == 1);                                            \
2921                                                                         \
2922     __inv = binvert_limb_table[(__n/2) & 0x7F]; /*  8 */                \
2923     if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;   \
2924     if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;   \
2925     if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;   \
2926                                                                         \
2927     if (GMP_NUMB_BITS > 64)                                             \
2928       {                                                                 \
2929         int  __invbits = 64;                                            \
2930         do {                                                            \
2931           __inv = 2 * __inv - __inv * __inv * __n;                      \
2932           __invbits *= 2;                                               \
2933         } while (__invbits < GMP_NUMB_BITS);                            \
2934       }                                                                 \
2935                                                                         \
2936     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                        \
2937     (inv) = __inv & GMP_NUMB_MASK;                                      \
2938   } while (0)
2939 #define modlimb_invert binvert_limb  /* backward compatibility */
2940
2941 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2942    Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2943    GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2944    we need to start from GMP_NUMB_MAX>>1. */
2945 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2946
2947 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2948    These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2949 #define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
2950 #define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2951
2952
2953 /* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
2954
2955    It's not clear whether this is the best way to do this calculation.
2956    Anything congruent to -a would be fine for the one limb congruence
2957    tests.  */
2958
2959 #define NEG_MOD(r, a, d)                                                \
2960   do {                                                                  \
2961     ASSERT ((d) != 0);                                                  \
2962     ASSERT_LIMB (a);                                                    \
2963     ASSERT_LIMB (d);                                                    \
2964                                                                         \
2965     if ((a) <= (d))                                                     \
2966       {                                                                 \
2967         /* small a is reasonably likely */                              \
2968         (r) = (d) - (a);                                                \
2969       }                                                                 \
2970     else                                                                \
2971       {                                                                 \
2972         unsigned   __twos;                                              \
2973         mp_limb_t  __dnorm;                                             \
2974         count_leading_zeros (__twos, d);                                \
2975         __twos -= GMP_NAIL_BITS;                                        \
2976         __dnorm = (d) << __twos;                                        \
2977         (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);             \
2978       }                                                                 \
2979                                                                         \
2980     ASSERT_LIMB (r);                                                    \
2981   } while (0)
2982
2983 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2984 #define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
2985
2986
2987 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2988    to 0 if there's an even number.  "n" should be an unsigned long and "p"
2989    an int.  */
2990
2991 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2992 #define ULONG_PARITY(p, n)                                              \
2993   do {                                                                  \
2994     int __p;                                                            \
2995     __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));                    \
2996     (p) = __p & 1;                                                      \
2997   } while (0)
2998 #endif
2999
3000 /* Cray intrinsic _popcnt. */
3001 #ifdef _CRAY
3002 #define ULONG_PARITY(p, n)      \
3003   do {                          \
3004     (p) = _popcnt (n) & 1;      \
3005   } while (0)
3006 #endif
3007
3008 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
3009     && ! defined (NO_ASM) && defined (__ia64)
3010 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
3011    to a 64 bit unsigned long long for popcnt */
3012 #define ULONG_PARITY(p, n)                                              \
3013   do {                                                                  \
3014     unsigned long long  __n = (unsigned long) (n);                      \
3015     int  __p;                                                           \
3016     __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));                \
3017     (p) = __p & 1;                                                      \
3018   } while (0)
3019 #endif
3020
3021 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
3022     && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
3023 #if __GMP_GNUC_PREREQ (3,1)
3024 #define __GMP_qm "=Qm"
3025 #define __GMP_q "=Q"
3026 #else
3027 #define __GMP_qm "=qm"
3028 #define __GMP_q "=q"
3029 #endif
3030 #define ULONG_PARITY(p, n)                                              \
3031   do {                                                                  \
3032     char           __p;                                                 \
3033     unsigned long  __n = (n);                                           \
3034     __n ^= (__n >> 16);                                                 \
3035     __asm__ ("xorb %h1, %b1\n\t"                                        \
3036              "setpo %0"                                                 \
3037          : __GMP_qm (__p), __GMP_q (__n)                                \
3038          : "1" (__n));                                                  \
3039     (p) = __p;                                                          \
3040   } while (0)
3041 #endif
3042
3043 #if ! defined (ULONG_PARITY)
3044 #define ULONG_PARITY(p, n)                                              \
3045   do {                                                                  \
3046     unsigned long  __n = (n);                                           \
3047     int  __p = 0;                                                       \
3048     do                                                                  \
3049       {                                                                 \
3050         __p ^= 0x96696996L >> (__n & 0x1F);                             \
3051         __n >>= 5;                                                      \
3052       }                                                                 \
3053     while (__n != 0);                                                   \
3054                                                                         \
3055     (p) = __p & 1;                                                      \
3056   } while (0)
3057 #endif
3058
3059
3060 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
3061    version 3.1 at least) doesn't seem to know how to generate rlwimi for
3062    anything other than bit-fields, so use "asm".  */
3063 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
3064   && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
3065 #define BSWAP_LIMB(dst, src)                                            \
3066   do {                                                                  \
3067     mp_limb_t  __bswapl_src = (src);                                    \
3068     mp_limb_t  __tmp1 = __bswapl_src >> 24;             /* low byte */  \
3069     mp_limb_t  __tmp2 = __bswapl_src << 24;             /* high byte */ \
3070     __asm__ ("rlwimi %0, %2, 24, 16, 23"                /* 2nd low */   \
3071          : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));           \
3072     __asm__ ("rlwimi %0, %2,  8,  8, 15"                /* 3nd high */  \
3073          : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));           \
3074     (dst) = __tmp1 | __tmp2;                            /* whole */     \
3075   } while (0)
3076 #endif
3077
3078 /* bswap is available on i486 and up and is fast.  A combination rorw $8 /
3079    roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
3080    kernel with xchgb instead of rorw), but this is not done here, because
3081    i386 means generic x86 and mixing word and dword operations will cause
3082    partial register stalls on P6 chips.  */
3083 #if defined (__GNUC__) && ! defined (NO_ASM)            \
3084   && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
3085   && GMP_LIMB_BITS == 32
3086 #define BSWAP_LIMB(dst, src)                                            \
3087   do {                                                                  \
3088     __asm__ ("bswap %0" : "=r" (dst) : "0" (src));                      \
3089   } while (0)
3090 #endif
3091
3092 #if defined (__GNUC__) && ! defined (NO_ASM)            \
3093   && defined (__amd64__) && GMP_LIMB_BITS == 64
3094 #define BSWAP_LIMB(dst, src)                                            \
3095   do {                                                                  \
3096     __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));                     \
3097   } while (0)
3098 #endif
3099
3100 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
3101     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3102 #define BSWAP_LIMB(dst, src)                                            \
3103   do {                                                                  \
3104     __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));           \
3105   } while (0)
3106 #endif
3107
3108 /* As per glibc. */
3109 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
3110   && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
3111 #define BSWAP_LIMB(dst, src)                                            \
3112   do {                                                                  \
3113     mp_limb_t  __bswapl_src = (src);                                    \
3114     __asm__ ("ror%.w %#8, %0\n\t"                                       \
3115              "swap   %0\n\t"                                            \
3116              "ror%.w %#8, %0"                                           \
3117              : "=d" (dst)                                               \
3118              : "0" (__bswapl_src));                                     \
3119   } while (0)
3120 #endif
3121
3122 #if ! defined (BSWAP_LIMB)
3123 #if GMP_LIMB_BITS == 8
3124 #define BSWAP_LIMB(dst, src)            \
3125   do { (dst) = (src); } while (0)
3126 #endif
3127 #if GMP_LIMB_BITS == 16
3128 #define BSWAP_LIMB(dst, src)                    \
3129   do {                                          \
3130     (dst) = ((src) << 8) + ((src) >> 8);        \
3131   } while (0)
3132 #endif
3133 #if GMP_LIMB_BITS == 32
3134 #define BSWAP_LIMB(dst, src)    \
3135   do {                          \
3136     (dst) =                     \
3137       ((src) << 24)             \
3138       + (((src) & 0xFF00) << 8) \
3139       + (((src) >> 8) & 0xFF00) \
3140       + ((src) >> 24);          \
3141   } while (0)
3142 #endif
3143 #if GMP_LIMB_BITS == 64
3144 #define BSWAP_LIMB(dst, src)            \
3145   do {                                  \
3146     (dst) =                             \
3147       ((src) << 56)                     \
3148       + (((src) & 0xFF00) << 40)        \
3149       + (((src) & 0xFF0000) << 24)      \
3150       + (((src) & 0xFF000000) << 8)     \
3151       + (((src) >> 8) & 0xFF000000)     \
3152       + (((src) >> 24) & 0xFF0000)      \
3153       + (((src) >> 40) & 0xFF00)        \
3154       + ((src) >> 56);                  \
3155   } while (0)
3156 #endif
3157 #endif
3158
3159 #if ! defined (BSWAP_LIMB)
3160 #define BSWAP_LIMB(dst, src)                            \
3161   do {                                                  \
3162     mp_limb_t  __bswapl_src = (src);                    \
3163     mp_limb_t  __dst = 0;                               \
3164     int        __i;                                     \
3165     for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)       \
3166       {                                                 \
3167         __dst = (__dst << 8) | (__bswapl_src & 0xFF);   \
3168         __bswapl_src >>= 8;                             \
3169       }                                                 \
3170     (dst) = __dst;                                      \
3171   } while (0)
3172 #endif
3173
3174
3175 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3176    those we know are fast.  */
3177 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
3178   && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                        \
3179   && (HAVE_HOST_CPU_powerpc604                                          \
3180       || HAVE_HOST_CPU_powerpc604e                                      \
3181       || HAVE_HOST_CPU_powerpc750                                       \
3182       || HAVE_HOST_CPU_powerpc7400)
3183 #define BSWAP_LIMB_FETCH(limb, src)                                     \
3184   do {                                                                  \
3185     mp_srcptr  __blf_src = (src);                                       \
3186     mp_limb_t  __limb;                                                  \
3187     __asm__ ("lwbrx %0, 0, %1"                                          \
3188              : "=r" (__limb)                                            \
3189              : "r" (__blf_src),                                         \
3190                "m" (*__blf_src));                                       \
3191     (limb) = __limb;                                                    \
3192   } while (0)
3193 #endif
3194
3195 #if ! defined (BSWAP_LIMB_FETCH)
3196 #define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
3197 #endif
3198
3199
3200 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3201    know are fast.  FIXME: Is this necessary?  */
3202 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
3203   && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN                        \
3204   && (HAVE_HOST_CPU_powerpc604                                          \
3205       || HAVE_HOST_CPU_powerpc604e                                      \
3206       || HAVE_HOST_CPU_powerpc750                                       \
3207       || HAVE_HOST_CPU_powerpc7400)
3208 #define BSWAP_LIMB_STORE(dst, limb)                                     \
3209   do {                                                                  \
3210     mp_ptr     __dst = (dst);                                           \
3211     mp_limb_t  __limb = (limb);                                         \
3212     __asm__ ("stwbrx %1, 0, %2"                                         \
3213              : "=m" (*__dst)                                            \
3214              : "r" (__limb),                                            \
3215                "r" (__dst));                                            \
3216   } while (0)
3217 #endif
3218
3219 #if ! defined (BSWAP_LIMB_STORE)
3220 #define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
3221 #endif
3222
3223
3224 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3225 #define MPN_BSWAP(dst, src, size)                       \
3226   do {                                                  \
3227     mp_ptr     __dst = (dst);                           \
3228     mp_srcptr  __src = (src);                           \
3229     mp_size_t  __size = (size);                         \
3230     mp_size_t  __i;                                     \
3231     ASSERT ((size) >= 0);                               \
3232     ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \
3233     CRAY_Pragma ("_CRI ivdep");                         \
3234     for (__i = 0; __i < __size; __i++)                  \
3235       {                                                 \
3236         BSWAP_LIMB_FETCH (*__dst, __src);               \
3237         __dst++;                                        \
3238         __src++;                                        \
3239       }                                                 \
3240   } while (0)
3241
3242 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3243 #define MPN_BSWAP_REVERSE(dst, src, size)               \
3244   do {                                                  \
3245     mp_ptr     __dst = (dst);                           \
3246     mp_size_t  __size = (size);                         \
3247     mp_srcptr  __src = (src) + __size - 1;              \
3248     mp_size_t  __i;                                     \
3249     ASSERT ((size) >= 0);                               \
3250     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
3251     CRAY_Pragma ("_CRI ivdep");                         \
3252     for (__i = 0; __i < __size; __i++)                  \
3253       {                                                 \
3254         BSWAP_LIMB_FETCH (*__dst, __src);               \
3255         __dst++;                                        \
3256         __src--;                                        \
3257       }                                                 \
3258   } while (0)
3259
3260
3261 /* No processor claiming to be SPARC v9 compliant seems to
3262    implement the POPC instruction.  Disable pattern for now.  */
3263 #if 0
3264 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
3265 #define popc_limb(result, input)                                        \
3266   do {                                                                  \
3267     DItype __res;                                                       \
3268     __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));              \
3269   } while (0)
3270 #endif
3271 #endif
3272
3273 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3274 #define popc_limb(result, input)                                        \
3275   do {                                                                  \
3276     __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));             \
3277   } while (0)
3278 #endif
3279
3280 /* Cray intrinsic. */
3281 #ifdef _CRAY
3282 #define popc_limb(result, input)        \
3283   do {                                  \
3284     (result) = _popcnt (input);         \
3285   } while (0)
3286 #endif
3287
3288 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
3289     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3290 #define popc_limb(result, input)                                        \
3291   do {                                                                  \
3292     __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));           \
3293   } while (0)
3294 #endif
3295
3296 /* Cool population count of an mp_limb_t.
3297    You have to figure out how this works, We won't tell you!
3298
3299    The constants could also be expressed as:
3300      0x55... = [2^N / 3]     = [(2^N-1)/3]
3301      0x33... = [2^N / 5]     = [(2^N-1)/5]
3302      0x0f... = [2^N / 17]    = [(2^N-1)/17]
3303      (N is GMP_LIMB_BITS, [] denotes truncation.) */
3304
3305 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3306 #define popc_limb(result, input)                                        \
3307   do {                                                                  \
3308     mp_limb_t  __x = (input);                                           \
3309     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3310     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3311     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3312     (result) = __x & 0xff;                                              \
3313   } while (0)
3314 #endif
3315
3316 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3317 #define popc_limb(result, input)                                        \
3318   do {                                                                  \
3319     mp_limb_t  __x = (input);                                           \
3320     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3321     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3322     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3323     __x = ((__x >> 8) + __x);                                           \
3324     (result) = __x & 0xff;                                              \
3325   } while (0)
3326 #endif
3327
3328 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3329 #define popc_limb(result, input)                                        \
3330   do {                                                                  \
3331     mp_limb_t  __x = (input);                                           \
3332     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3333     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3334     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3335     __x = ((__x >> 8) + __x);                                           \
3336     __x = ((__x >> 16) + __x);                                          \
3337     (result) = __x & 0xff;                                              \
3338   } while (0)
3339 #endif
3340
3341 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3342 #define popc_limb(result, input)                                        \
3343   do {                                                                  \
3344     mp_limb_t  __x = (input);                                           \
3345     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
3346     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
3347     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
3348     __x = ((__x >> 8) + __x);                                           \
3349     __x = ((__x >> 16) + __x);                                          \
3350     __x = ((__x >> 32) + __x);                                          \
3351     (result) = __x & 0xff;                                              \
3352   } while (0)
3353 #endif
3354
3355
3356 /* Define stuff for longlong.h.  */
3357 #if HAVE_ATTRIBUTE_MODE
3358 typedef unsigned int UQItype    __attribute__ ((mode (QI)));
3359 typedef          int SItype     __attribute__ ((mode (SI)));
3360 typedef unsigned int USItype    __attribute__ ((mode (SI)));
3361 typedef          int DItype     __attribute__ ((mode (DI)));
3362 typedef unsigned int UDItype    __attribute__ ((mode (DI)));
3363 #else
3364 typedef unsigned char UQItype;
3365 typedef          long SItype;
3366 typedef unsigned long USItype;
3367 #if HAVE_LONG_LONG
3368 typedef long long int DItype;
3369 typedef unsigned long long int UDItype;
3370 #else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
3371 typedef long int DItype;
3372 typedef unsigned long int UDItype;
3373 #endif
3374 #endif
3375
3376 typedef mp_limb_t UWtype;
3377 typedef unsigned int UHWtype;
3378 #define W_TYPE_SIZE GMP_LIMB_BITS
3379
3380 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3381
3382    Bit field packing is "implementation defined" according to C99, which
3383    leaves us at the compiler's mercy here.  For some systems packing is
3384    defined in the ABI (eg. x86).  In any case so far it seems universal that
3385    little endian systems pack from low to high, and big endian from high to
3386    low within the given type.
3387
3388    Within the fields we rely on the integer endianness being the same as the
3389    float endianness, this is true everywhere we know of and it'd be a fairly
3390    strange system that did anything else.  */
3391
3392 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3393 #define _GMP_IEEE_FLOATS 1
3394 union ieee_double_extract
3395 {
3396   struct
3397     {
3398       gmp_uint_least32_t manh:20;
3399       gmp_uint_least32_t exp:11;
3400       gmp_uint_least32_t sig:1;
3401       gmp_uint_least32_t manl:32;
3402     } s;
3403   double d;
3404 };
3405 #endif
3406
3407 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3408 #define _GMP_IEEE_FLOATS 1
3409 union ieee_double_extract
3410 {
3411   struct
3412     {
3413       gmp_uint_least32_t manl:32;
3414       gmp_uint_least32_t manh:20;
3415       gmp_uint_least32_t exp:11;
3416       gmp_uint_least32_t sig:1;
3417     } s;
3418   double d;
3419 };
3420 #endif
3421
3422 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3423 #define _GMP_IEEE_FLOATS 1
3424 union ieee_double_extract
3425 {
3426   struct
3427     {
3428       gmp_uint_least32_t sig:1;
3429       gmp_uint_least32_t exp:11;
3430       gmp_uint_least32_t manh:20;
3431       gmp_uint_least32_t manl:32;
3432     } s;
3433   double d;
3434 };
3435 #endif
3436
3437
3438 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3439    that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
3440 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3441 /* Maximum number of limbs it will take to store any `double'.
3442    We assume doubles have 53 mantissa bits.  */
3443 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3444
3445 __GMP_DECLSPEC int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
3446
3447 #define mpn_get_d __gmpn_get_d
3448 __GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
3449
3450
3451 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3452    a_inf if x is an infinity.  Both are considered unlikely values, for
3453    branch prediction.  */
3454
3455 #if _GMP_IEEE_FLOATS
3456 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
3457   do {                                          \
3458     union ieee_double_extract  u;               \
3459     u.d = (x);                                  \
3460     if (UNLIKELY (u.s.exp == 0x7FF))            \
3461       {                                         \
3462         if (u.s.manl == 0 && u.s.manh == 0)     \
3463           { a_inf; }                            \
3464         else                                    \
3465           { a_nan; }                            \
3466       }                                         \
3467   } while (0)
3468 #endif
3469
3470 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3471 /* no nans or infs in these formats */
3472 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
3473   do { } while (0)
3474 #endif
3475
3476 #ifndef DOUBLE_NAN_INF_ACTION
3477 /* Unknown format, try something generic.
3478    NaN should be "unordered", so x!=x.
3479    Inf should be bigger than DBL_MAX.  */
3480 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                  \
3481   do {                                                          \
3482     {                                                           \
3483       if (UNLIKELY ((x) != (x)))                                \
3484         { a_nan; }                                              \
3485       else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))      \
3486         { a_inf; }                                              \
3487     }                                                           \
3488   } while (0)
3489 #endif
3490
3491 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3492    in the coprocessor, which means a bigger exponent range than normal, and
3493    depending on the rounding mode, a bigger mantissa than normal.  (See
3494    "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
3495    "d" through memory to force any rounding and overflows to occur.
3496
3497    On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3498    registers, where there's no such extra precision and no need for the
3499    FORCE_DOUBLE.  We don't bother to detect this since the present uses for
3500    FORCE_DOUBLE are only in test programs and default generic C code.
3501
3502    Not quite sure that an "automatic volatile" will use memory, but it does
3503    in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3504    apparently matching operands like "0" are only allowed on a register
3505    output.  gcc 3.4 warns about this, though in fact it and past versions
3506    seem to put the operand through memory as hoped.  */
3507
3508 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
3509      || defined (__amd64__))
3510 #define FORCE_DOUBLE(d) \
3511   do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3512 #else
3513 #define FORCE_DOUBLE(d)  do { } while (0)
3514 #endif
3515
3516
3517 __GMP_DECLSPEC extern int __gmp_junk;
3518 __GMP_DECLSPEC extern const int __gmp_0;
3519 __GMP_DECLSPEC void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
3520 __GMP_DECLSPEC void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3521 __GMP_DECLSPEC void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3522 __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3523 #define GMP_ERROR(code)   __gmp_exception (code)
3524 #define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
3525 #define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
3526
3527 #if defined _LONG_LONG_LIMB
3528 #if __GMP_HAVE_TOKEN_PASTE
3529 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3530 #else
3531 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
3532 #endif
3533 #else /* not _LONG_LONG_LIMB */
3534 #if __GMP_HAVE_TOKEN_PASTE
3535 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3536 #else
3537 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
3538 #endif
3539 #endif /* _LONG_LONG_LIMB */
3540
3541 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3542 #if GMP_NUMB_BITS == 2
3543 #define PP 0x3                                  /* 3 */
3544 #define PP_FIRST_OMITTED 5
3545 #endif
3546 #if GMP_NUMB_BITS == 4
3547 #define PP 0xF                                  /* 3 x 5 */
3548 #define PP_FIRST_OMITTED 7
3549 #endif
3550 #if GMP_NUMB_BITS == 8
3551 #define PP 0x69                                 /* 3 x 5 x 7 */
3552 #define PP_FIRST_OMITTED 11
3553 #endif
3554 #if GMP_NUMB_BITS == 16
3555 #define PP 0x3AA7                               /* 3 x 5 x 7 x 11 x 13 */
3556 #define PP_FIRST_OMITTED 17
3557 #endif
3558 #if GMP_NUMB_BITS == 32
3559 #define PP 0xC0CFD797L                          /* 3 x 5 x 7 x 11 x ... x 29 */
3560 #define PP_INVERTED 0x53E5645CL
3561 #define PP_FIRST_OMITTED 31
3562 #endif
3563 #if GMP_NUMB_BITS == 64
3564 #define PP CNST_LIMB(0xE221F97C30E94E1D)        /* 3 x 5 x 7 x 11 x ... x 53 */
3565 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3566 #define PP_FIRST_OMITTED 59
3567 #endif
3568 #ifndef PP_FIRST_OMITTED
3569 #define PP_FIRST_OMITTED 3
3570 #endif
3571
3572
3573
3574 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3575    zero bit representing +1 and a one bit representing -1.  Bits other than
3576    bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
3577    used to ensure the expressions are "int"s even if a and/or b might be
3578    other types.
3579
3580    JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3581    and their speed is important.  Expressions are used rather than
3582    conditionals to accumulate sign changes, which effectively means XORs
3583    instead of conditional JUMPs. */
3584
3585 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3586 #define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
3587
3588 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3589 #define JACOBI_U0(a)   ((a) == 1)
3590
3591 /* (a/0), with a given by low and size;
3592    is 1 if a=+/-1, 0 otherwise */
3593 #define JACOBI_LS0(alow,asize) \
3594   (((asize) == 1 || (asize) == -1) && (alow) == 1)
3595
3596 /* (a/0), with a an mpz_t;
3597    fetch of low limb always valid, even if size is zero */
3598 #define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
3599
3600 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3601 #define JACOBI_0U(b)   ((b) == 1)
3602
3603 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3604 #define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
3605
3606 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3607 #define JACOBI_0LS(blow,bsize) \
3608   (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3609
3610 /* Convert a bit1 to +1 or -1. */
3611 #define JACOBI_BIT1_TO_PN(result_bit1) \
3612   (1 - ((int) (result_bit1) & 2))
3613
3614 /* (2/b), with b unsigned and odd;
3615    is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3616    hence obtained from (b>>1)^b */
3617 #define JACOBI_TWO_U_BIT1(b) \
3618   ((int) (((b) >> 1) ^ (b)))
3619
3620 /* (2/b)^twos, with b unsigned and odd */
3621 #define JACOBI_TWOS_U_BIT1(twos, b) \
3622   ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3623
3624 /* (2/b)^twos, with b unsigned and odd */
3625 #define JACOBI_TWOS_U(twos, b) \
3626   (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3627
3628 /* (-1/b), with b odd (signed or unsigned);
3629    is (-1)^((b-1)/2) */
3630 #define JACOBI_N1B_BIT1(b) \
3631   ((int) (b))
3632
3633 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3634    is (-1/b) if a<0, or +1 if a>=0 */
3635 #define JACOBI_ASGN_SU_BIT1(a, b) \
3636   ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3637
3638 /* (a/b) effect due to sign of b: signed/signed;
3639    is -1 if a and b both negative, +1 otherwise */
3640 #define JACOBI_BSGN_SS_BIT1(a, b) \
3641   ((((a)<0) & ((b)<0)) << 1)
3642
3643 /* (a/b) effect due to sign of b: signed/mpz;
3644    is -1 if a and b both negative, +1 otherwise */
3645 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3646   JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3647
3648 /* (a/b) effect due to sign of b: mpz/signed;
3649    is -1 if a and b both negative, +1 otherwise */
3650 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3651   JACOBI_BSGN_SZ_BIT1 (b, a)
3652
3653 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3654    is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3655    both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
3656    because this is used in a couple of places with only bit 1 of a or b
3657    valid. */
3658 #define JACOBI_RECIP_UU_BIT1(a, b) \
3659   ((int) ((a) & (b)))
3660
3661 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3662    decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
3663    updated for the new b_ptr.  result_bit1 is updated according to the
3664    factors of 2 stripped, as per (a/2).  */
3665 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)    \
3666   do {                                                                  \
3667     ASSERT ((b_size) >= 1);                                             \
3668     ASSERT ((b_low) == (b_ptr)[0]);                                     \
3669                                                                         \
3670     while (UNLIKELY ((b_low) == 0))                                     \
3671       {                                                                 \
3672         (b_size)--;                                                     \
3673         ASSERT ((b_size) >= 1);                                         \
3674         (b_ptr)++;                                                      \
3675         (b_low) = *(b_ptr);                                             \
3676                                                                         \
3677         ASSERT (((a) & 1) != 0);                                        \
3678         if ((GMP_NUMB_BITS % 2) == 1)                                   \
3679           (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \
3680       }                                                                 \
3681   } while (0)
3682
3683 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3684    modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
3685    unsigned.  modexact_1_odd effectively calculates -a mod b, and
3686    result_bit1 is adjusted for the factor of -1.
3687
3688    The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3689    sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3690    factor to introduce into result_bit1, so for that case use mpn_mod_1
3691    unconditionally.
3692
3693    FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3694    for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
3695    or not skip a divide step, or something. */
3696
3697 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3698   do {                                                                     \
3699     mp_srcptr  __a_ptr  = (a_ptr);                                         \
3700     mp_size_t  __a_size = (a_size);                                        \
3701     mp_limb_t  __b      = (b);                                             \
3702                                                                            \
3703     ASSERT (__a_size >= 1);                                                \
3704     ASSERT (__b & 1);                                                      \
3705                                                                            \
3706     if ((GMP_NUMB_BITS % 2) != 0                                           \
3707         || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))          \
3708       {                                                                    \
3709         (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
3710       }                                                                    \
3711     else                                                                   \
3712       {                                                                    \
3713         (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
3714         (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
3715       }                                                                    \
3716   } while (0)
3717
3718 /* Matrix multiplication */
3719 #define   mpn_matrix22_mul __MPN(matrix22_mul)
3720 __GMP_DECLSPEC void      mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3721 #define   mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
3722 __GMP_DECLSPEC void      mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3723 #define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
3724 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
3725
3726 #ifndef MATRIX22_STRASSEN_THRESHOLD
3727 #define MATRIX22_STRASSEN_THRESHOLD 30
3728 #endif
3729
3730 /* HGCD definitions */
3731
3732 /* Extract one numb, shifting count bits left
3733     ________  ________
3734    |___xh___||___xl___|
3735           |____r____|
3736    >count <
3737
3738    The count includes any nail bits, so it should work fine if count
3739    is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
3740    xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
3741
3742    FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
3743    those calls where the count high bits of xh may be non-zero.
3744 */
3745
3746 #define MPN_EXTRACT_NUMB(count, xh, xl)                         \
3747   ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |      \
3748    ((xl) >> (GMP_LIMB_BITS - (count))))
3749
3750
3751 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
3752    reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
3753    than a, b. The determinant must always be one, so that M has an
3754    inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
3755    bits. */
3756 struct hgcd_matrix1
3757 {
3758   mp_limb_t u[2][2];
3759 };
3760
3761 #define mpn_hgcd2 __MPN (hgcd2)
3762 __GMP_DECLSPEC int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,  struct hgcd_matrix1 *));
3763
3764 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
3765 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3766
3767 #define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
3768 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3769
3770 struct hgcd_matrix
3771 {
3772   mp_size_t alloc;              /* for sanity checking only */
3773   mp_size_t n;
3774   mp_ptr p[2][2];
3775 };
3776
3777 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
3778
3779 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
3780 __GMP_DECLSPEC void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
3781
3782 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
3783 __GMP_DECLSPEC void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
3784
3785 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
3786 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3787
3788 #define mpn_hgcd_itch __MPN (hgcd_itch)
3789 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
3790
3791 #define mpn_hgcd __MPN (hgcd)
3792 __GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3793
3794 #define MPN_HGCD_LEHMER_ITCH(n) (n)
3795
3796 #define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
3797 __GMP_DECLSPEC mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3798
3799 /* Needs storage for the quotient */
3800 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
3801
3802 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
3803 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3804
3805 #define MPN_GCD_LEHMER_N_ITCH(n) (n)
3806
3807 #define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
3808 __GMP_DECLSPEC mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3809
3810 #define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
3811 __GMP_DECLSPEC mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
3812
3813 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
3814
3815 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
3816 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3817
3818 /* 4*(an + 1) + 4*(bn + 1) + an */
3819 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
3820
3821 #ifndef HGCD_THRESHOLD
3822 #define HGCD_THRESHOLD 400
3823 #endif
3824
3825 #ifndef GCD_DC_THRESHOLD
3826 #define GCD_DC_THRESHOLD 1000
3827 #endif
3828
3829 #ifndef GCDEXT_DC_THRESHOLD
3830 #define GCDEXT_DC_THRESHOLD 600
3831 #endif
3832
3833 /* Definitions for mpn_set_str and mpn_get_str */
3834 struct powers
3835 {
3836   mp_ptr p;                     /* actual power value */
3837   mp_size_t n;                  /* # of limbs at p */
3838   mp_size_t shift;              /* weight of lowest limb, in limb base B */
3839   size_t digits_in_base;        /* number of corresponding digits */
3840   int base;
3841 };
3842 typedef struct powers powers_t;
3843 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
3844 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
3845 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
3846 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
3847
3848 #define   mpn_dc_set_str __MPN(dc_set_str)
3849 __GMP_DECLSPEC mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
3850 #define   mpn_bc_set_str __MPN(bc_set_str)
3851 __GMP_DECLSPEC mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
3852 #define   mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
3853 __GMP_DECLSPEC void      mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
3854
3855
3856 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3857    limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
3858    hence giving back the user's size in bits rounded up.  Notice that
3859    converting prec->bits->prec gives an unchanged value.  */
3860 #define __GMPF_BITS_TO_PREC(n)                                          \
3861   ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3862 #define __GMPF_PREC_TO_BITS(n) \
3863   ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3864
3865 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
3866
3867
3868 /* Set n to the number of significant digits an mpf of the given _mp_prec
3869    field, in the given base.  This is a rounded up value, designed to ensure
3870    there's enough digits to reproduce all the guaranteed part of the value.
3871
3872    There are prec many limbs, but the high might be only "1" so forget it
3873    and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
3874    further +1 is because the limbs usually won't fall on digit boundaries.
3875
3876    FIXME: If base is a power of 2 and the bits per digit divides
3877    GMP_LIMB_BITS then the +2 is unnecessary.  This happens always for
3878    base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3879
3880 #define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \
3881   do {                                                                  \
3882     ASSERT (base >= 2 && base < numberof (mp_bases));                   \
3883     (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS)         \
3884                         * mp_bases[(base)].chars_per_bit_exactly);      \
3885   } while (0)
3886
3887
3888 /* Decimal point string, from the current C locale.  Needs <langinfo.h> for
3889    nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
3890    DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3891    their respective #if HAVE_FOO_H.
3892
3893    GLIBC recommends nl_langinfo because getting only one facet can be
3894    faster, apparently. */
3895
3896 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3897 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3898 #define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
3899 #endif
3900 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3901 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3902 #define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
3903 #endif
3904 /* localeconv is slower since it returns all locale stuff */
3905 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3906 #define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
3907 #endif
3908 #if ! defined (GMP_DECIMAL_POINT)
3909 #define GMP_DECIMAL_POINT  (".")
3910 #endif
3911
3912
3913 #define DOPRNT_CONV_FIXED        1
3914 #define DOPRNT_CONV_SCIENTIFIC   2
3915 #define DOPRNT_CONV_GENERAL      3
3916
3917 #define DOPRNT_JUSTIFY_NONE      0
3918 #define DOPRNT_JUSTIFY_LEFT      1
3919 #define DOPRNT_JUSTIFY_RIGHT     2
3920 #define DOPRNT_JUSTIFY_INTERNAL  3
3921
3922 #define DOPRNT_SHOWBASE_YES      1
3923 #define DOPRNT_SHOWBASE_NO       2
3924 #define DOPRNT_SHOWBASE_NONZERO  3
3925
3926 struct doprnt_params_t {
3927   int         base;          /* negative for upper case */
3928   int         conv;          /* choices above */
3929   const char  *expfmt;       /* exponent format */
3930   int         exptimes4;     /* exponent multiply by 4 */
3931   char        fill;          /* character */
3932   int         justify;       /* choices above */
3933   int         prec;          /* prec field, or -1 for all digits */
3934   int         showbase;      /* choices above */
3935   int         showpoint;     /* if radix point always shown */
3936   int         showtrailing;  /* if trailing zeros wanted */
3937   char        sign;          /* '+', ' ', or '\0' */
3938   int         width;         /* width field */
3939 };
3940
3941 #if _GMP_H_HAVE_VA_LIST
3942
3943 __GMP_DECLSPEC typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
3944 __GMP_DECLSPEC typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
3945 __GMP_DECLSPEC typedef int (*doprnt_reps_t)   __GMP_PROTO ((void *, int, int));
3946 __GMP_DECLSPEC typedef int (*doprnt_final_t)  __GMP_PROTO ((void *));
3947
3948 struct doprnt_funs_t {
3949   doprnt_format_t  format;
3950   doprnt_memory_t  memory;
3951   doprnt_reps_t    reps;
3952   doprnt_final_t   final;   /* NULL if not required */
3953 };
3954
3955 extern const struct doprnt_funs_t  __gmp_fprintf_funs;
3956 extern const struct doprnt_funs_t  __gmp_sprintf_funs;
3957 extern const struct doprnt_funs_t  __gmp_snprintf_funs;
3958 extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
3959 extern const struct doprnt_funs_t  __gmp_ostream_funs;
3960
3961 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
3962    "size" of these have been written.  "alloc > size" is maintained, so
3963    there's room to store a '\0' at the end.  "result" is where the
3964    application wants the final block pointer.  */
3965 struct gmp_asprintf_t {
3966   char    **result;
3967   char    *buf;
3968   size_t  size;
3969   size_t  alloc;
3970 };
3971
3972 #define GMP_ASPRINTF_T_INIT(d, output)                          \
3973   do {                                                          \
3974     (d).result = (output);                                      \
3975     (d).alloc = 256;                                            \
3976     (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \
3977     (d).size = 0;                                               \
3978   } while (0)
3979
3980 /* If a realloc is necessary, use twice the size actually required, so as to
3981    avoid repeated small reallocs.  */
3982 #define GMP_ASPRINTF_T_NEED(d, n)                                       \
3983   do {                                                                  \
3984     size_t  alloc, newsize, newalloc;                                   \
3985     ASSERT ((d)->alloc >= (d)->size + 1);                               \
3986                                                                         \
3987     alloc = (d)->alloc;                                                 \
3988     newsize = (d)->size + (n);                                          \
3989     if (alloc <= newsize)                                               \
3990       {                                                                 \
3991         newalloc = 2*newsize;                                           \
3992         (d)->alloc = newalloc;                                          \
3993         (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
3994                                                alloc, newalloc, char);  \
3995       }                                                                 \
3996   } while (0)
3997
3998 __GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
3999 __GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
4000 __GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
4001
4002 /* buf is where to write the next output, and size is how much space is left
4003    there.  If the application passed size==0 then that's what we'll have
4004    here, and nothing at all should be written.  */
4005 struct gmp_snprintf_t {
4006   char    *buf;
4007   size_t  size;
4008 };
4009
4010 /* Add the bytes printed by the call to the total retval, or bail out on an
4011    error.  */
4012 #define DOPRNT_ACCUMULATE(call) \
4013   do {                          \
4014     int  __ret;                 \
4015     __ret = call;               \
4016     if (__ret == -1)            \
4017       goto error;               \
4018     retval += __ret;            \
4019   } while (0)
4020 #define DOPRNT_ACCUMULATE_FUN(fun, params)      \
4021   do {                                          \
4022     ASSERT ((fun) != NULL);                     \
4023     DOPRNT_ACCUMULATE ((*(fun)) params);        \
4024   } while (0)
4025
4026 #define DOPRNT_FORMAT(fmt, ap)                          \
4027   DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
4028 #define DOPRNT_MEMORY(ptr, len)                                 \
4029   DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
4030 #define DOPRNT_REPS(c, n)                               \
4031   DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
4032
4033 #define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
4034
4035 #define DOPRNT_REPS_MAYBE(c, n) \
4036   do {                          \
4037     if ((n) != 0)               \
4038       DOPRNT_REPS (c, n);       \
4039   } while (0)
4040 #define DOPRNT_MEMORY_MAYBE(ptr, len)   \
4041   do {                                  \
4042     if ((len) != 0)                     \
4043       DOPRNT_MEMORY (ptr, len);         \
4044   } while (0)
4045
4046 __GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
4047 __GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
4048
4049 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
4050 __GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
4051
4052 __GMP_DECLSPEC int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
4053 #endif /* _GMP_H_HAVE_VA_LIST */
4054
4055
4056 typedef int (*gmp_doscan_scan_t)  __GMP_PROTO ((void *, const char *, ...));
4057 typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
4058 typedef int (*gmp_doscan_get_t)   __GMP_PROTO ((void *));
4059 typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
4060
4061 struct gmp_doscan_funs_t {
4062   gmp_doscan_scan_t   scan;
4063   gmp_doscan_step_t   step;
4064   gmp_doscan_get_t    get;
4065   gmp_doscan_unget_t  unget;
4066 };
4067 extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
4068 extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
4069
4070 #if _GMP_H_HAVE_VA_LIST
4071 __GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *, const char *, va_list));
4072 #endif
4073
4074
4075 /* For testing and debugging.  */
4076 #define MPZ_CHECK_FORMAT(z)                                     \
4077   do {                                                          \
4078     ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);   \
4079     ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                       \
4080     ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \
4081   } while (0)
4082
4083 #define MPQ_CHECK_FORMAT(q)                             \
4084   do {                                                  \
4085     MPZ_CHECK_FORMAT (mpq_numref (q));                  \
4086     MPZ_CHECK_FORMAT (mpq_denref (q));                  \
4087     ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \
4088                                                         \
4089     if (SIZ(mpq_numref(q)) == 0)                        \
4090       {                                                 \
4091         /* should have zero as 0/1 */                   \
4092         ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \
4093                        && PTR(mpq_denref(q))[0] == 1);  \
4094       }                                                 \
4095     else                                                \
4096       {                                                 \
4097         /* should have no common factors */             \
4098         mpz_t  g;                                       \
4099         mpz_init (g);                                   \
4100         mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \
4101         ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \
4102         mpz_clear (g);                                  \
4103       }                                                 \
4104   } while (0)
4105
4106 #define MPF_CHECK_FORMAT(f)                             \
4107   do {                                                  \
4108     ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
4109     ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \
4110     if (SIZ(f) == 0)                                    \
4111       ASSERT_ALWAYS (EXP(f) == 0);                      \
4112     if (SIZ(f) != 0)                                    \
4113       ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \
4114   } while (0)
4115
4116
4117 #define MPZ_PROVOKE_REALLOC(z)                                  \
4118   do { ALLOC(z) = ABSIZ(z); } while (0)
4119
4120
4121 /* Enhancement: The "mod" and "gcd_1" functions below could have
4122    __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
4123    function pointers, only actual functions.  It probably doesn't make much
4124    difference to the gmp code, since hopefully we arrange calls so there's
4125    no great need for the compiler to move things around.  */
4126
4127 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
4128 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
4129    in mpn/x86/x86-defs.m4.  Be sure to update that when changing here.  */
4130 struct cpuvec_t {
4131   DECL_add_n           ((*add_n));
4132   DECL_addmul_1        ((*addmul_1));
4133   DECL_copyd           ((*copyd));
4134   DECL_copyi           ((*copyi));
4135   DECL_divexact_1      ((*divexact_1));
4136   DECL_divexact_by3c   ((*divexact_by3c));
4137   DECL_divrem_1        ((*divrem_1));
4138   DECL_gcd_1           ((*gcd_1));
4139   DECL_lshift          ((*lshift));
4140   DECL_mod_1           ((*mod_1));
4141   DECL_mod_34lsub1     ((*mod_34lsub1));
4142   DECL_modexact_1c_odd ((*modexact_1c_odd));
4143   DECL_mul_1           ((*mul_1));
4144   DECL_mul_basecase    ((*mul_basecase));
4145   DECL_preinv_divrem_1 ((*preinv_divrem_1));
4146   DECL_preinv_mod_1    ((*preinv_mod_1));
4147   DECL_rshift          ((*rshift));
4148   DECL_sqr_basecase    ((*sqr_basecase));
4149   DECL_sub_n           ((*sub_n));
4150   DECL_submul_1        ((*submul_1));
4151   int                  initialized;
4152   mp_size_t            mul_toom22_threshold;
4153   mp_size_t            mul_toom33_threshold;
4154   mp_size_t            sqr_toom2_threshold;
4155   mp_size_t            sqr_toom3_threshold;
4156 };
4157 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
4158 #endif /* x86 fat binary */
4159
4160 __GMP_DECLSPEC void __gmpn_cpuvec_init __GMP_PROTO ((void));
4161
4162 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
4163    if that hasn't yet been done (to establish the right values).  */
4164 #define CPUVEC_THRESHOLD(field)                                               \
4165   ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
4166    __gmpn_cpuvec.field)
4167
4168
4169 #if HAVE_NATIVE_mpn_add_nc
4170 #define mpn_add_nc __MPN(add_nc)
4171 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4172 #else
4173 static inline
4174 mp_limb_t
4175 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4176 {
4177   mp_limb_t co;
4178   co = mpn_add_n (rp, up, vp, n);
4179   co += mpn_add_1 (rp, rp, n, ci);
4180   return co;
4181 }
4182 #endif
4183
4184 #if HAVE_NATIVE_mpn_sub_nc
4185 #define mpn_sub_nc __MPN(sub_nc)
4186 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4187 #else
4188 static inline mp_limb_t
4189 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4190 {
4191   mp_limb_t co;
4192   co = mpn_sub_n (rp, up, vp, n);
4193   co += mpn_sub_1 (rp, rp, n, ci);
4194   return co;
4195 }
4196 #endif
4197
4198 static inline int
4199 mpn_zero_p (mp_srcptr ap, mp_size_t n)
4200 {
4201   mp_size_t i;
4202   for (i = n - 1; i >= 0; i--)
4203     {
4204       if (ap[i] != 0)
4205         return 0;
4206     }
4207   return 1;
4208 }
4209
4210 #if TUNE_PROGRAM_BUILD
4211 /* Some extras wanted when recompiling some .c files for use by the tune
4212    program.  Not part of a normal build.
4213
4214    It's necessary to keep these thresholds as #defines (just to an
4215    identically named variable), since various defaults are established based
4216    on #ifdef in the .c files.  For some this is not so (the defaults are
4217    instead established above), but all are done this way for consistency. */
4218
4219 #undef  MUL_TOOM22_THRESHOLD
4220 #define MUL_TOOM22_THRESHOLD            mul_toom22_threshold
4221 extern mp_size_t                        mul_toom22_threshold;
4222
4223 #undef  MUL_TOOM33_THRESHOLD
4224 #define MUL_TOOM33_THRESHOLD            mul_toom33_threshold
4225 extern mp_size_t                        mul_toom33_threshold;
4226
4227 #undef  MUL_TOOM44_THRESHOLD
4228 #define MUL_TOOM44_THRESHOLD            mul_toom44_threshold
4229 extern mp_size_t                        mul_toom44_threshold;
4230
4231 #undef  MUL_TOOM6H_THRESHOLD
4232 #define MUL_TOOM6H_THRESHOLD            mul_toom6h_threshold
4233 extern mp_size_t                        mul_toom6h_threshold;
4234
4235 #undef  MUL_TOOM8H_THRESHOLD
4236 #define MUL_TOOM8H_THRESHOLD            mul_toom8h_threshold
4237 extern mp_size_t                        mul_toom8h_threshold;
4238
4239 #undef  MUL_TOOM32_TO_TOOM43_THRESHOLD
4240 #define MUL_TOOM32_TO_TOOM43_THRESHOLD  mul_toom32_to_toom43_threshold
4241 extern mp_size_t                        mul_toom32_to_toom43_threshold;
4242
4243 #undef  MUL_TOOM32_TO_TOOM53_THRESHOLD
4244 #define MUL_TOOM32_TO_TOOM53_THRESHOLD  mul_toom32_to_toom53_threshold
4245 extern mp_size_t                        mul_toom32_to_toom53_threshold;
4246
4247 #undef  MUL_TOOM42_TO_TOOM53_THRESHOLD
4248 #define MUL_TOOM42_TO_TOOM53_THRESHOLD  mul_toom42_to_toom53_threshold
4249 extern mp_size_t                        mul_toom42_to_toom53_threshold;
4250
4251 #undef  MUL_TOOM42_TO_TOOM63_THRESHOLD
4252 #define MUL_TOOM42_TO_TOOM63_THRESHOLD  mul_toom42_to_toom63_threshold
4253 extern mp_size_t                        mul_toom42_to_toom63_threshold;
4254
4255 #undef  MUL_FFT_THRESHOLD
4256 #define MUL_FFT_THRESHOLD               mul_fft_threshold
4257 extern mp_size_t                        mul_fft_threshold;
4258
4259 #undef  MUL_FFT_MODF_THRESHOLD
4260 #define MUL_FFT_MODF_THRESHOLD          mul_fft_modf_threshold
4261 extern mp_size_t                        mul_fft_modf_threshold;
4262
4263 #undef  MUL_FFT_TABLE
4264 #define MUL_FFT_TABLE                   { 0 }
4265
4266 #undef  MUL_FFT_TABLE3
4267 #define MUL_FFT_TABLE3                  { {0,0} }
4268
4269 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4270    remain as zero (always use it). */
4271 #if ! HAVE_NATIVE_mpn_sqr_basecase
4272 #undef  SQR_BASECASE_THRESHOLD
4273 #define SQR_BASECASE_THRESHOLD          sqr_basecase_threshold
4274 extern mp_size_t                        sqr_basecase_threshold;
4275 #endif
4276
4277 #if TUNE_PROGRAM_BUILD_SQR
4278 #undef  SQR_TOOM2_THRESHOLD
4279 #define SQR_TOOM2_THRESHOLD             SQR_TOOM2_MAX_GENERIC
4280 #else
4281 #undef  SQR_TOOM2_THRESHOLD
4282 #define SQR_TOOM2_THRESHOLD             sqr_toom2_threshold
4283 extern mp_size_t                        sqr_toom2_threshold;
4284 #endif
4285
4286 #undef  SQR_TOOM3_THRESHOLD
4287 #define SQR_TOOM3_THRESHOLD             sqr_toom3_threshold
4288 extern mp_size_t                        sqr_toom3_threshold;
4289
4290 #undef  SQR_TOOM4_THRESHOLD
4291 #define SQR_TOOM4_THRESHOLD             sqr_toom4_threshold
4292 extern mp_size_t                        sqr_toom4_threshold;
4293
4294 #undef  SQR_TOOM6_THRESHOLD
4295 #define SQR_TOOM6_THRESHOLD             sqr_toom6_threshold
4296 extern mp_size_t                        sqr_toom6_threshold;
4297
4298 #undef  SQR_TOOM8_THRESHOLD
4299 #define SQR_TOOM8_THRESHOLD             sqr_toom8_threshold
4300 extern mp_size_t                        sqr_toom8_threshold;
4301
4302 #undef  SQR_FFT_THRESHOLD
4303 #define SQR_FFT_THRESHOLD               sqr_fft_threshold
4304 extern mp_size_t                        sqr_fft_threshold;
4305
4306 #undef  SQR_FFT_MODF_THRESHOLD
4307 #define SQR_FFT_MODF_THRESHOLD          sqr_fft_modf_threshold
4308 extern mp_size_t                        sqr_fft_modf_threshold;
4309
4310 #undef  SQR_FFT_TABLE
4311 #define SQR_FFT_TABLE                   { 0 }
4312
4313 #undef  SQR_FFT_TABLE3
4314 #define SQR_FFT_TABLE3                  { {0,0} }
4315
4316 #undef  MULLO_BASECASE_THRESHOLD
4317 #define MULLO_BASECASE_THRESHOLD        mullo_basecase_threshold
4318 extern mp_size_t                        mullo_basecase_threshold;
4319
4320 #undef  MULLO_DC_THRESHOLD
4321 #define MULLO_DC_THRESHOLD              mullo_dc_threshold
4322 extern mp_size_t                        mullo_dc_threshold;
4323
4324 #undef  MULLO_MUL_N_THRESHOLD
4325 #define MULLO_MUL_N_THRESHOLD           mullo_mul_n_threshold
4326 extern mp_size_t                        mullo_mul_n_threshold;
4327
4328 #undef  DC_DIV_QR_THRESHOLD
4329 #define DC_DIV_QR_THRESHOLD             dc_div_qr_threshold
4330 extern mp_size_t                        dc_div_qr_threshold;
4331
4332 #undef  DC_DIVAPPR_Q_THRESHOLD
4333 #define DC_DIVAPPR_Q_THRESHOLD          dc_divappr_q_threshold
4334 extern mp_size_t                        dc_divappr_q_threshold;
4335
4336 #undef  DC_BDIV_Q_THRESHOLD
4337 #define DC_BDIV_Q_THRESHOLD             dc_bdiv_q_threshold
4338 extern mp_size_t                        dc_bdiv_q_threshold;
4339
4340 #undef  DC_BDIV_QR_THRESHOLD
4341 #define DC_BDIV_QR_THRESHOLD            dc_bdiv_qr_threshold
4342 extern mp_size_t                        dc_bdiv_qr_threshold;
4343
4344 #undef  MU_DIV_QR_THRESHOLD
4345 #define MU_DIV_QR_THRESHOLD             mu_div_qr_threshold
4346 extern mp_size_t                        mu_div_qr_threshold;
4347
4348 #undef  MU_DIVAPPR_Q_THRESHOLD
4349 #define MU_DIVAPPR_Q_THRESHOLD          mu_divappr_q_threshold
4350 extern mp_size_t                        mu_divappr_q_threshold;
4351
4352 #undef  MUPI_DIV_QR_THRESHOLD
4353 #define MUPI_DIV_QR_THRESHOLD           mupi_div_qr_threshold
4354 extern mp_size_t                        mupi_div_qr_threshold;
4355
4356 #undef  MU_BDIV_QR_THRESHOLD
4357 #define MU_BDIV_QR_THRESHOLD            mu_bdiv_qr_threshold
4358 extern mp_size_t                        mu_bdiv_qr_threshold;
4359
4360 #undef  MU_BDIV_Q_THRESHOLD
4361 #define MU_BDIV_Q_THRESHOLD             mu_bdiv_q_threshold
4362 extern mp_size_t                        mu_bdiv_q_threshold;
4363
4364 #undef  INV_MULMOD_BNM1_THRESHOLD
4365 #define INV_MULMOD_BNM1_THRESHOLD       inv_mulmod_bnm1_threshold
4366 extern mp_size_t                        inv_mulmod_bnm1_threshold;
4367
4368 #undef  INV_NEWTON_THRESHOLD
4369 #define INV_NEWTON_THRESHOLD            inv_newton_threshold
4370 extern mp_size_t                        inv_newton_threshold;
4371
4372 #undef  INV_APPR_THRESHOLD
4373 #define INV_APPR_THRESHOLD              inv_appr_threshold
4374 extern mp_size_t                        inv_appr_threshold;
4375
4376 #undef  BINV_NEWTON_THRESHOLD
4377 #define BINV_NEWTON_THRESHOLD           binv_newton_threshold
4378 extern mp_size_t                        binv_newton_threshold;
4379
4380 #undef  REDC_1_TO_REDC_2_THRESHOLD
4381 #define REDC_1_TO_REDC_2_THRESHOLD      redc_1_to_redc_2_threshold
4382 extern mp_size_t                        redc_1_to_redc_2_threshold;
4383
4384 #undef  REDC_2_TO_REDC_N_THRESHOLD
4385 #define REDC_2_TO_REDC_N_THRESHOLD      redc_2_to_redc_n_threshold
4386 extern mp_size_t                        redc_2_to_redc_n_threshold;
4387
4388 #undef  REDC_1_TO_REDC_N_THRESHOLD
4389 #define REDC_1_TO_REDC_N_THRESHOLD      redc_1_to_redc_n_threshold
4390 extern mp_size_t                        redc_1_to_redc_n_threshold;
4391
4392 #undef  MATRIX22_STRASSEN_THRESHOLD
4393 #define MATRIX22_STRASSEN_THRESHOLD     matrix22_strassen_threshold
4394 extern mp_size_t                        matrix22_strassen_threshold;
4395
4396 #undef  HGCD_THRESHOLD
4397 #define HGCD_THRESHOLD                  hgcd_threshold
4398 extern mp_size_t                        hgcd_threshold;
4399
4400 #undef  GCD_DC_THRESHOLD
4401 #define GCD_DC_THRESHOLD                gcd_dc_threshold
4402 extern mp_size_t                        gcd_dc_threshold;
4403
4404 #undef  GCDEXT_DC_THRESHOLD
4405 #define GCDEXT_DC_THRESHOLD             gcdext_dc_threshold
4406 extern mp_size_t                        gcdext_dc_threshold;
4407
4408 #undef  DIVREM_1_NORM_THRESHOLD
4409 #define DIVREM_1_NORM_THRESHOLD         divrem_1_norm_threshold
4410 extern mp_size_t                        divrem_1_norm_threshold;
4411
4412 #undef  DIVREM_1_UNNORM_THRESHOLD
4413 #define DIVREM_1_UNNORM_THRESHOLD       divrem_1_unnorm_threshold
4414 extern mp_size_t                        divrem_1_unnorm_threshold;
4415
4416 #undef  MOD_1_NORM_THRESHOLD
4417 #define MOD_1_NORM_THRESHOLD            mod_1_norm_threshold
4418 extern mp_size_t                        mod_1_norm_threshold;
4419
4420 #undef  MOD_1_UNNORM_THRESHOLD
4421 #define MOD_1_UNNORM_THRESHOLD          mod_1_unnorm_threshold
4422 extern mp_size_t                        mod_1_unnorm_threshold;
4423
4424 #undef  MOD_1N_TO_MOD_1_1_THRESHOLD
4425 #define MOD_1N_TO_MOD_1_1_THRESHOLD     mod_1n_to_mod_1_1_threshold
4426 extern mp_size_t                        mod_1n_to_mod_1_1_threshold;
4427
4428 #undef  MOD_1U_TO_MOD_1_1_THRESHOLD
4429 #define MOD_1U_TO_MOD_1_1_THRESHOLD     mod_1u_to_mod_1_1_threshold
4430 extern mp_size_t                        mod_1u_to_mod_1_1_threshold;
4431
4432 #undef  MOD_1_1_TO_MOD_1_2_THRESHOLD
4433 #define MOD_1_1_TO_MOD_1_2_THRESHOLD    mod_1_1_to_mod_1_2_threshold
4434 extern mp_size_t                        mod_1_1_to_mod_1_2_threshold;
4435
4436 #undef  MOD_1_2_TO_MOD_1_4_THRESHOLD
4437 #define MOD_1_2_TO_MOD_1_4_THRESHOLD    mod_1_2_to_mod_1_4_threshold
4438 extern mp_size_t                        mod_1_2_to_mod_1_4_threshold;
4439
4440 #undef  PREINV_MOD_1_TO_MOD_1_THRESHOLD
4441 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold
4442 extern mp_size_t                        preinv_mod_1_to_mod_1_threshold;
4443
4444 #if ! UDIV_PREINV_ALWAYS
4445 #undef  DIVREM_2_THRESHOLD
4446 #define DIVREM_2_THRESHOLD              divrem_2_threshold
4447 extern mp_size_t                        divrem_2_threshold;
4448 #endif
4449
4450 #undef  MULMOD_BNM1_THRESHOLD
4451 #define MULMOD_BNM1_THRESHOLD           mulmod_bnm1_threshold
4452 extern mp_size_t                        mulmod_bnm1_threshold;
4453
4454 #undef  SQRMOD_BNM1_THRESHOLD
4455 #define SQRMOD_BNM1_THRESHOLD           sqrmod_bnm1_threshold
4456 extern mp_size_t                        sqrmod_bnm1_threshold;
4457
4458 #undef  GET_STR_DC_THRESHOLD
4459 #define GET_STR_DC_THRESHOLD            get_str_dc_threshold
4460 extern mp_size_t                        get_str_dc_threshold;
4461
4462 #undef  GET_STR_PRECOMPUTE_THRESHOLD
4463 #define GET_STR_PRECOMPUTE_THRESHOLD    get_str_precompute_threshold
4464 extern mp_size_t                        get_str_precompute_threshold;
4465
4466 #undef  SET_STR_DC_THRESHOLD
4467 #define SET_STR_DC_THRESHOLD            set_str_dc_threshold
4468 extern mp_size_t                        set_str_dc_threshold;
4469
4470 #undef  SET_STR_PRECOMPUTE_THRESHOLD
4471 #define SET_STR_PRECOMPUTE_THRESHOLD    set_str_precompute_threshold
4472 extern mp_size_t                        set_str_precompute_threshold;
4473
4474 #undef  FFT_TABLE_ATTRS
4475 #define FFT_TABLE_ATTRS
4476 extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
4477 #define FFT_TABLE3_SIZE 2000    /* generous space for tuning */
4478 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
4479
4480 /* Sizes the tune program tests up to, used in a couple of recompilations. */
4481 #undef MUL_TOOM22_THRESHOLD_LIMIT
4482 #undef MUL_TOOM33_THRESHOLD_LIMIT
4483 #undef MULLO_BASECASE_THRESHOLD_LIMIT
4484 #undef SQR_TOOM3_THRESHOLD_LIMIT
4485 #define SQR_TOOM2_MAX_GENERIC           200
4486 #define MUL_TOOM22_THRESHOLD_LIMIT      700
4487 #define MUL_TOOM33_THRESHOLD_LIMIT      700
4488 #define SQR_TOOM3_THRESHOLD_LIMIT       400
4489 #define MUL_TOOM44_THRESHOLD_LIMIT     1000
4490 #define SQR_TOOM4_THRESHOLD_LIMIT      1000
4491 #define MUL_TOOM6H_THRESHOLD_LIMIT     1100
4492 #define SQR_TOOM6_THRESHOLD_LIMIT      1100
4493 #define MUL_TOOM8H_THRESHOLD_LIMIT     1200
4494 #define SQR_TOOM8_THRESHOLD_LIMIT      1200
4495 #define MULLO_BASECASE_THRESHOLD_LIMIT  200
4496 #define GET_STR_THRESHOLD_LIMIT         150
4497
4498 #endif /* TUNE_PROGRAM_BUILD */
4499
4500 #if defined (__cplusplus)
4501 }
4502 #endif
4503
4504 /* FIXME: Make these itch functions less conservative.  Also consider making
4505    them dependent on just 'an', and compute the allocation directly from 'an'
4506    instead of via n.  */
4507
4508 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
4509    k is ths smallest k such that
4510      ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
4511    which implies that
4512      k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
4513        = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
4514 */
4515 #define mpn_toom22_mul_itch(an, bn) \
4516   (2 * ((an) + GMP_NUMB_BITS))
4517 #define mpn_toom2_sqr_itch(an) \
4518   (2 * ((an) + GMP_NUMB_BITS))
4519
4520 /* Can probably be trimmed to 2 an + O(log an). */
4521 #define mpn_toom33_mul_itch(an, bn) \
4522   ((5 * (an) >> 1) + GMP_NUMB_BITS)
4523 #define mpn_toom3_sqr_itch(an) \
4524   ((5 * (an) >> 1) + GMP_NUMB_BITS)
4525
4526 #define mpn_toom44_mul_itch(an, bn) \
4527   (3 * (an) + GMP_NUMB_BITS)
4528 #define mpn_toom4_sqr_itch(an) \
4529   (3 * (an) + GMP_NUMB_BITS)
4530
4531 #define mpn_toom6_sqr_itch(n)                                           \
4532 ( ((n) - SQR_TOOM6_THRESHOLD)*2 +                                       \
4533    MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6,                 \
4534        mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)) )
4535
4536 #define mpn_toom6_mul_n_itch(n)                                         \
4537 ( ((n) - MUL_TOOM6H_THRESHOLD)*2 +                                      \
4538    MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6,                        \
4539        mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
4540
4541 static inline mp_size_t
4542 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
4543   mp_size_t estimatedN;
4544   estimatedN = (an + bn) / (size_t) 10 + 1;
4545   return mpn_toom6_mul_n_itch (estimatedN * 6);
4546 }
4547
4548 #define mpn_toom8_sqr_itch(n)                                           \
4549 ( (((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) +                       \
4550    MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,         \
4551        mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)) )
4552
4553 #define mpn_toom8_mul_n_itch(n)                                         \
4554 ( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) +                      \
4555    MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,                \
4556        mpn_toom6_mul_n_itch(MUL_TOOM8H_THRESHOLD)) )
4557
4558 static inline mp_size_t
4559 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
4560   mp_size_t estimatedN;
4561   estimatedN = (an + bn) / (size_t) 14 + 1;
4562   return mpn_toom8_mul_n_itch (estimatedN * 8);
4563 }
4564
4565 static inline mp_size_t
4566 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
4567 {
4568   mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
4569   mp_size_t itch = 2 * n + 1;
4570
4571   return itch;
4572 }
4573
4574 static inline mp_size_t
4575 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
4576 {
4577   mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
4578   return 6 * n + 3;
4579 }
4580
4581 static inline mp_size_t
4582 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
4583 {
4584   mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
4585
4586   return 6*n + 4;
4587 }
4588
4589 static inline mp_size_t
4590 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
4591 {
4592   mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
4593   return 6*n + 4;
4594 }
4595
4596 static inline mp_size_t
4597 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
4598 {
4599   mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
4600   return 10 * n + 10;
4601 }
4602
4603 static inline mp_size_t
4604 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
4605 {
4606   mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
4607   return 10 * n + 10;
4608 }
4609
4610 static inline mp_size_t
4611 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
4612 {
4613   mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
4614   return 9 * n + 3;
4615 }
4616
4617 #if 0
4618 #define mpn_fft_mul mpn_mul_fft_full
4619 #else
4620 #define mpn_fft_mul mpn_nussbaumer_mul
4621 #endif
4622
4623 #ifdef __cplusplus
4624
4625 /* A little helper for a null-terminated __gmp_allocate_func string.
4626    The destructor ensures it's freed even if an exception is thrown.
4627    The len field is needed by the destructor, and can be used by anyone else
4628    to avoid a second strlen pass over the data.
4629
4630    Since our input is a C string, using strlen is correct.  Perhaps it'd be
4631    more C++-ish style to use std::char_traits<char>::length, but char_traits
4632    isn't available in gcc 2.95.4.  */
4633
4634 class gmp_allocated_string {
4635  public:
4636   char *str;
4637   size_t len;
4638   gmp_allocated_string(char *arg)
4639   {
4640     str = arg;
4641     len = std::strlen (str);
4642   }
4643   ~gmp_allocated_string()
4644   {
4645     (*__gmp_free_func) (str, len+1);
4646   }
4647 };
4648
4649 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
4650 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
4651 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
4652 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
4653 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
4654 extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
4655
4656 #endif /* __cplusplus */
4657
4658 #endif /* __GMP_IMPL_H__ */