gmp-impl.h

   1 /* Include file for internal GNU MP types and definitions.
   2
   3    THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
   4    BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
   5
   6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
   7 2004, 2005, 2006 Free Software Foundation, Inc.
   8
   9 This file is part of the GNU MP Library.
  10
  11 The GNU MP Library is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU Lesser General Public License as published by
  13 the Free Software Foundation; either version 2.1 of the License, or (at your
  14 option) any later version.
  15
  16 The GNU MP Library is distributed in the hope that it will be useful, but
  17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  18 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  19 License for more details.
  20
  21 You should have received a copy of the GNU Lesser General Public License
  22 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
  23 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  24 MA 02110-1301, USA. */
  25
  26
  27 /* __GMP_DECLSPEC must be given on any global data that will be accessed
  28    from outside libgmp, meaning from the test or development programs, or
  29    from libgmpxx.  Failing to do this will result in an incorrect address
  30    being used for the accesses.  On functions __GMP_DECLSPEC makes calls
  31    from outside libgmp more efficient, but they'll still work fine without
  32    it.  */
  33
  34
  35 #ifndef __GMP_IMPL_H__
  36 #define __GMP_IMPL_H__
  37
  38 #if defined _CRAY
  39 #include <intrinsics.h>  /* for _popcnt */
  40 #endif
  41
  42 /* limits.h is not used in general, since it's an ANSI-ism, and since on
  43    solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
  44    values (the ABI=64 values).
  45
  46    On Cray vector systems, however, we need the system limits.h since sizes
  47    of signed and unsigned types can differ there, depending on compiler
  48    options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
  49    reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
  50    short can be 24, 32, 46 or 64 bits, and different for ushort.  */
  51
  52 #if defined _CRAY
  53 #include <limits.h>
  54 #endif
  55
  56 /* For fat.h and other fat binary stuff.
  57    No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
  58    declared this way are only used to set function pointers in __gmp_cpuvec,
  59    they're not called directly.  */
  60 #define DECL_add_n(name) \
  61   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
  62 #define DECL_addmul_1(name) \
  63   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  64 #define DECL_copyd(name) \
  65   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  66 #define DECL_copyi(name) \
  67   DECL_copyd (name)
  68 #define DECL_divexact_1(name) \
  69   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  70 #define DECL_divexact_by3c(name) \
  71   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
  72 #define DECL_divrem_1(name) \
  73   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
  74 #define DECL_gcd_1(name) \
  75   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  76 #define DECL_lshift(name) \
  77   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
  78 #define DECL_mod_1(name) \
  79   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
  80 #define DECL_mod_34lsub1(name) \
  81   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
  82 #define DECL_modexact_1c_odd(name) \
  83   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  84 #define DECL_mul_1(name) \
  85   DECL_addmul_1 (name)
  86 #define DECL_mul_basecase(name) \
  87   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
  88 #define DECL_preinv_divrem_1(name) \
  89   mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
  90 #define DECL_preinv_mod_1(name) \
  91   mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
  92 #define DECL_rshift(name) \
  93   DECL_lshift (name)
  94 #define DECL_sqr_basecase(name) \
  95   void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
  96 #define DECL_sub_n(name) \
  97   DECL_add_n (name)
  98 #define DECL_submul_1(name) \
  99   DECL_addmul_1 (name)
 100
 101 #if ! __GMP_WITHIN_CONFIGURE
 102 #include "config.h"
 103 #include "gmp-mparam.h"
 104 #include "fib_table.h"
 105 #include "mp_bases.h"
 106 #if WANT_FAT_BINARY
 107 #include "fat.h"
 108 #endif
 109 #endif
 110
 111 #if HAVE_INTTYPES_H      /* for uint_least32_t */
 112 # include <inttypes.h>
 113 #else
 114 # if HAVE_STDINT_H
 115 #  include <stdint.h>
 116 # endif
 117 #endif
 118
 119 #ifdef __cplusplus
 120 #include <cstring>  /* for strlen */
 121 #include <string>   /* for std::string */
 122 #endif
 123
 124
 125 #ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
 126 #define WANT_TMP_DEBUG 0
 127 #endif
 128
 129
 130 /* Might search and replace _PROTO to __GMP_PROTO internally one day, to
 131    avoid two names for one thing, but no hurry for that.  */
 132 #define _PROTO(x)  __GMP_PROTO(x)
 133
 134 /* The following tries to get a good version of alloca.  The tests are
 135    adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
 136    Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
 137    be setup appropriately.
 138
 139    ifndef alloca - a cpp define might already exist.
 140        glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
 141        HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
 142
 143    GCC __builtin_alloca - preferred whenever available.
 144
 145    _AIX pragma - IBM compilers need a #pragma in "each module that needs to
 146        use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
 147        used in past versions of GMP, retained still in case it matters.
 148
 149        The autoconf manual says this pragma needs to be at the start of a C
 150        file, apart from comments and preprocessor directives.  Is that true?
 151        xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
 152        from gmp.h.
 153 */
 154
 155 #ifndef alloca
 156 # ifdef __GNUC__
 157 #  define alloca __builtin_alloca
 158 # else
 159 #  ifdef __DECC
 160 #   define alloca(x) __ALLOCA(x)
 161 #  else
 162 #   ifdef _MSC_VER
 163 #    include <malloc.h>
 164 #    define alloca _alloca
 165 #   else
 166 #    if HAVE_ALLOCA_H
 167 #     include <alloca.h>
 168 #    else
 169 #     if defined (_AIX) || defined (_IBMR2)
 170  #pragma alloca
 171 #     else
 172        char *alloca ();
 173 #     endif
 174 #    endif
 175 #   endif
 176 #  endif
 177 # endif
 178 #endif
 179
 180
 181 /* if not provided by gmp-mparam.h */
 182 #ifndef BYTES_PER_MP_LIMB
 183 #define BYTES_PER_MP_LIMB  SIZEOF_MP_LIMB_T
 184 #endif
 185 #ifndef BITS_PER_MP_LIMB
 186 #define BITS_PER_MP_LIMB  (8 * SIZEOF_MP_LIMB_T)
 187 #endif
 188
 189 #define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
 190
 191
 192 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
 193 #if HAVE_UINT_LEAST32_T
 194 typedef uint_least32_t      gmp_uint_least32_t;
 195 #else
 196 #if SIZEOF_UNSIGNED_SHORT >= 4
 197 typedef unsigned short      gmp_uint_least32_t;
 198 #else
 199 #if SIZEOF_UNSIGNED >= 4
 200 typedef unsigned            gmp_uint_least32_t;
 201 #else
 202 typedef unsigned long       gmp_uint_least32_t;
 203 #endif
 204 #endif
 205 #endif
 206
 207
 208 /* const and signed must match __gmp_const and __gmp_signed, so follow the
 209    decision made for those in gmp.h.    */
 210 #if ! __GMP_HAVE_CONST
 211 #define const   /* empty */
 212 #define signed  /* empty */
 213 #endif
 214
 215 /* "const" basically means a function does nothing but examine its arguments
 216    and give a return value, it doesn't read or write any memory (neither
 217    global nor pointed to by arguments), and has no other side-effects.  This
 218    is more restrictive than "pure".  See info node "(gcc)Function
 219    Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
 220    this off when trying to write timing loops.  */
 221 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
 222 #define ATTRIBUTE_CONST  __attribute__ ((const))
 223 #else
 224 #define ATTRIBUTE_CONST
 225 #endif
 226
 227 #if HAVE_ATTRIBUTE_NORETURN
 228 #define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
 229 #else
 230 #define ATTRIBUTE_NORETURN
 231 #endif
 232
 233 /* "malloc" means a function behaves like malloc in that the pointer it
 234    returns doesn't alias anything.  */
 235 #if HAVE_ATTRIBUTE_MALLOC
 236 #define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
 237 #else
 238 #define ATTRIBUTE_MALLOC
 239 #endif
 240
 241
 242 #if ! HAVE_STRCHR
 243 #define strchr(s,c)  index(s,c)
 244 #endif
 245
 246 #if ! HAVE_MEMSET
 247 #define memset(p, c, n)                 \
 248   do {                                  \
 249     ASSERT ((n) >= 0);                  \
 250     char *__memset__p = (p);            \
 251     int  __i;                           \
 252     for (__i = 0; __i < (n); __i++)     \
 253       __memset__p[__i] = (c);           \
 254   } while (0)
 255 #endif
 256
 257 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
 258    mode.  Falling back to a memcpy will give maximum portability, since it
 259    works no matter whether va_list is a pointer, struct or array.  */
 260 #if ! defined (va_copy) && defined (__va_copy)
 261 #define va_copy(dst,src)  __va_copy(dst,src)
 262 #endif
 263 #if ! defined (va_copy)
 264 #define va_copy(dst,src) \
 265   do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
 266 #endif
 267
 268
 269 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
 270    (ie. ctlz, ctpop, cttz).  */
 271 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
 272   || HAVE_HOST_CPU_alphaev7
 273 #define HAVE_HOST_CPU_alpha_CIX 1
 274 #endif
 275
 276
 277 #if defined (__cplusplus)
 278 extern "C" {
 279 #endif
 280
 281
 282 /* Usage: TMP_DECL;
 283           TMP_MARK;
 284           ptr = TMP_ALLOC (bytes);
 285           TMP_FREE;
 286
 287    Small allocations should use TMP_SALLOC, big allocations should use
 288    TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
 289
 290    Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
 291    TMP_SFREE.
 292
 293    TMP_DECL just declares a variable, but might be empty and so must be last
 294    in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
 295    TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
 296    TMP_MARK was made, but then no TMP_ALLOCs.  */
 297
 298 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
 299    __gmp_allocate_func doesn't already determine it.  Currently TMP_ALLOC
 300    isn't used for "double"s, so that's not in the union.  */
 301 union tmp_align_t {
 302   mp_limb_t  l;
 303   char       *p;
 304 };
 305 #define __TMP_ALIGN  sizeof (union tmp_align_t)
 306
 307 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
 308    "a" must be an unsigned type.
 309    This is designed for use with a compile-time constant "m".
 310    The POW2 case is expected to be usual, and gcc 3.0 and up recognises
 311    "(-(8*n))%8" or the like is always zero, which means the rounding up in
 312    the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
 313 #define ROUND_UP_MULTIPLE(a,m)          \
 314   (POW2_P(m) ? (a) + (-(a))%(m)         \
 315    : (a)+(m)-1 - (((a)+(m)-1) % (m)))
 316
 317 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
 318 struct tmp_reentrant_t {
 319   struct tmp_reentrant_t  *next;
 320   size_t                  size;   /* bytes, including header */
 321 };
 322 void *__gmp_tmp_reentrant_alloc _PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
 323 void  __gmp_tmp_reentrant_free _PROTO ((struct tmp_reentrant_t *));
 324 #endif
 325
 326 #if WANT_TMP_ALLOCA
 327 #define TMP_SDECL
 328 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 329 #define TMP_SMARK
 330 #define TMP_MARK                __tmp_marker = 0
 331 #define TMP_SALLOC(n)           alloca(n)
 332 #define TMP_BALLOC(n)           __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 333 #define TMP_ALLOC(n)                                                    \
 334   (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
 335 #define TMP_SFREE
 336 #define TMP_FREE                                                           \
 337   do {                                                                     \
 338     if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
 339   } while (0)
 340 #endif
 341
 342 #if WANT_TMP_REENTRANT
 343 #define TMP_SDECL               TMP_DECL
 344 #define TMP_DECL                struct tmp_reentrant_t *__tmp_marker
 345 #define TMP_SMARK               TMP_MARK
 346 #define TMP_MARK                __tmp_marker = 0
 347 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 348 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 349 #define TMP_ALLOC(n)            __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
 350 #define TMP_SFREE               TMP_FREE
 351 #define TMP_FREE                __gmp_tmp_reentrant_free (__tmp_marker)
 352 #endif
 353
 354 #if WANT_TMP_NOTREENTRANT
 355 struct tmp_marker
 356 {
 357   struct tmp_stack *which_chunk;
 358   void *alloc_point;
 359 };
 360 void *__gmp_tmp_alloc _PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
 361 void __gmp_tmp_mark _PROTO ((struct tmp_marker *));
 362 void __gmp_tmp_free _PROTO ((struct tmp_marker *));
 363 #define TMP_SDECL               TMP_DECL
 364 #define TMP_DECL                struct tmp_marker __tmp_marker
 365 #define TMP_SMARK               TMP_MARK
 366 #define TMP_MARK                __gmp_tmp_mark (&__tmp_marker)
 367 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 368 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 369 #define TMP_ALLOC(n)                                                    \
 370   __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
 371 #define TMP_SFREE               TMP_FREE
 372 #define TMP_FREE                __gmp_tmp_free (&__tmp_marker)
 373 #endif
 374
 375 #if WANT_TMP_DEBUG
 376 /* See tal-debug.c for some comments. */
 377 struct tmp_debug_t {
 378   struct tmp_debug_entry_t  *list;
 379   const char                *file;
 380   int                       line;
 381 };
 382 struct tmp_debug_entry_t {
 383   struct tmp_debug_entry_t  *next;
 384   char                      *block;
 385   size_t                    size;
 386 };
 387 void  __gmp_tmp_debug_mark  _PROTO ((const char *, int, struct tmp_debug_t **,
 388                                      struct tmp_debug_t *,
 389                                      const char *, const char *));
 390 void *__gmp_tmp_debug_alloc _PROTO ((const char *, int, int,
 391                                      struct tmp_debug_t **, const char *,
 392                                      size_t)) ATTRIBUTE_MALLOC;
 393 void  __gmp_tmp_debug_free  _PROTO ((const char *, int, int,
 394                                      struct tmp_debug_t **,
 395                                      const char *, const char *));
 396 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 397 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
 398 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 399 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
 400 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 401 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
 402 /* The marker variable is designed to provoke an uninitialized varialble
 403    warning from the compiler if TMP_FREE is used without a TMP_MARK.
 404    __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
 405    these things up too.  */
 406 #define TMP_DECL_NAME(marker, marker_name)                      \
 407   int marker;                                                   \
 408   int __tmp_marker_inscope;                                     \
 409   const char *__tmp_marker_name = marker_name;                  \
 410   struct tmp_debug_t  __tmp_marker_struct;                      \
 411   /* don't demand NULL, just cast a zero */                     \
 412   struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
 413 #define TMP_MARK_NAME(marker, marker_name)                      \
 414   do {                                                          \
 415     marker = 1;                                                 \
 416     __tmp_marker_inscope = 1;                                   \
 417     __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,            \
 418                            &__tmp_marker, &__tmp_marker_struct, \
 419                            __tmp_marker_name, marker_name);     \
 420   } while (0)
 421 #define TMP_SALLOC(n)           TMP_ALLOC(n)
 422 #define TMP_BALLOC(n)           TMP_ALLOC(n)
 423 #define TMP_ALLOC(size)                                                 \
 424   __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,                      \
 425                          __tmp_marker_inscope,                          \
 426                          &__tmp_marker, __tmp_marker_name, size)
 427 #define TMP_FREE_NAME(marker, marker_name)                      \
 428   do {                                                          \
 429     __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,            \
 430                            marker, &__tmp_marker,               \
 431                            __tmp_marker_name, marker_name);     \
 432   } while (0)
 433 #endif /* WANT_TMP_DEBUG */
 434
 435
 436 /* Allocating various types. */
 437 #define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
 438 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
 439 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
 440 #define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
 441 #define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
 442 #define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
 443 #define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
 444 #define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
 445 #define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
 446
 447 /* It's more efficient to allocate one block than two.  This is certainly
 448    true of the malloc methods, but it can even be true of alloca if that
 449    involves copying a chunk of stack (various RISCs), or a call to a stack
 450    bounds check (mingw).  In any case, when debugging keep separate blocks
 451    so a redzoning malloc debugger can protect each individually.  */
 452 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)           \
 453   do {                                                  \
 454     if (WANT_TMP_DEBUG)                                 \
 455       {                                                 \
 456         (xp) = TMP_ALLOC_LIMBS (xsize);                 \
 457         (yp) = TMP_ALLOC_LIMBS (ysize);                 \
 458       }                                                 \
 459     else                                                \
 460       {                                                 \
 461         (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));     \
 462         (yp) = (xp) + (xsize);                          \
 463       }                                                 \
 464   } while (0)
 465
 466
 467 /* From gmp.h, nicer names for internal use. */
 468 #define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
 469 #define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
 470 #define LIKELY(cond)                   __GMP_LIKELY(cond)
 471 #define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
 472
 473 #define ABS(x) ((x) >= 0 ? (x) : -(x))
 474 #undef MIN
 475 #define MIN(l,o) ((l) < (o) ? (l) : (o))
 476 #undef MAX
 477 #define MAX(h,i) ((h) > (i) ? (h) : (i))
 478 #define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
 479
 480 /* Field access macros.  */
 481 #define SIZ(x) ((x)->_mp_size)
 482 #define ABSIZ(x) ABS (SIZ (x))
 483 #define PTR(x) ((x)->_mp_d)
 484 #define LIMBS(x) ((x)->_mp_d)
 485 #define EXP(x) ((x)->_mp_exp)
 486 #define PREC(x) ((x)->_mp_prec)
 487 #define ALLOC(x) ((x)->_mp_alloc)
 488
 489 /* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
 490    then that lowest one bit must have been the only bit set.  n==0 will
 491    return true though, so avoid that.  */
 492 #define POW2_P(n)  (((n) & ((n) - 1)) == 0)
 493
 494
 495 /* The "short" defines are a bit different because shorts are promoted to
 496    ints by ~ or >> etc.
 497
 498    #ifndef's are used since on some systems (HP?) header files other than
 499    limits.h setup these defines.  We could forcibly #undef in that case, but
 500    there seems no need to worry about that.  */
 501
 502 #ifndef ULONG_MAX
 503 #define ULONG_MAX   __GMP_ULONG_MAX
 504 #endif
 505 #ifndef UINT_MAX
 506 #define UINT_MAX    __GMP_UINT_MAX
 507 #endif
 508 #ifndef USHRT_MAX
 509 #define USHRT_MAX   __GMP_USHRT_MAX
 510 #endif
 511 #define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
 512
 513 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
 514    unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
 515    treats the plain decimal values in <limits.h> as signed.  */
 516 #define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
 517 #define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
 518 #define USHRT_HIGHBIT      ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
 519 #define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
 520
 521 #ifndef LONG_MIN
 522 #define LONG_MIN           ((long) ULONG_HIGHBIT)
 523 #endif
 524 #ifndef LONG_MAX
 525 #define LONG_MAX           (-(LONG_MIN+1))
 526 #endif
 527
 528 #ifndef INT_MIN
 529 #define INT_MIN            ((int) UINT_HIGHBIT)
 530 #endif
 531 #ifndef INT_MAX
 532 #define INT_MAX            (-(INT_MIN+1))
 533 #endif
 534
 535 #ifndef SHRT_MIN
 536 #define SHRT_MIN           ((short) USHRT_HIGHBIT)
 537 #endif
 538 #ifndef SHRT_MAX
 539 #define SHRT_MAX           ((short) (-(SHRT_MIN+1)))
 540 #endif
 541
 542 #if __GMP_MP_SIZE_T_INT
 543 #define MP_SIZE_T_MAX      INT_MAX
 544 #define MP_SIZE_T_MIN      INT_MIN
 545 #else
 546 #define MP_SIZE_T_MAX      LONG_MAX
 547 #define MP_SIZE_T_MIN      LONG_MIN
 548 #endif
 549
 550 /* mp_exp_t is the same as mp_size_t */
 551 #define MP_EXP_T_MAX   MP_SIZE_T_MAX
 552 #define MP_EXP_T_MIN   MP_SIZE_T_MIN
 553
 554 #define LONG_HIGHBIT       LONG_MIN
 555 #define INT_HIGHBIT        INT_MIN
 556 #define SHRT_HIGHBIT       SHRT_MIN
 557
 558
 559 #define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
 560
 561 #if GMP_NAIL_BITS == 0
 562 #define GMP_NAIL_LOWBIT   CNST_LIMB(0)
 563 #else
 564 #define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
 565 #endif
 566
 567 #if GMP_NAIL_BITS != 0
 568 /* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
 569    code that has not yet been qualified.  */
 570
 571 #undef DIV_SB_PREINV_THRESHOLD
 572 #undef DIV_DC_THRESHOLD
 573 #undef POWM_THRESHOLD
 574 #define DIV_SB_PREINV_THRESHOLD           MP_SIZE_T_MAX
 575 #define DIV_DC_THRESHOLD                 50
 576 #define POWM_THRESHOLD                    0
 577
 578 #undef GCD_ACCEL_THRESHOLD
 579 #define GCD_ACCEL_THRESHOLD               3
 580
 581 #undef DIVREM_1_NORM_THRESHOLD
 582 #undef DIVREM_1_UNNORM_THRESHOLD
 583 #undef MOD_1_NORM_THRESHOLD
 584 #undef MOD_1_UNNORM_THRESHOLD
 585 #undef USE_PREINV_DIVREM_1
 586 #undef USE_PREINV_MOD_1
 587 #undef DIVREM_2_THRESHOLD
 588 #undef DIVEXACT_1_THRESHOLD
 589 #undef MODEXACT_1_ODD_THRESHOLD
 590 #define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
 591 #define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
 592 #define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
 593 #define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
 594 #define USE_PREINV_DIVREM_1               0  /* no preinv */
 595 #define USE_PREINV_MOD_1                  0  /* no preinv */
 596 #define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
 597
 598 #undef GET_STR_DC_THRESHOLD
 599 #undef GET_STR_PRECOMPUTE_THRESHOLD
 600 #undef SET_STR_THRESHOLD
 601 #define GET_STR_DC_THRESHOLD             22
 602 #define GET_STR_PRECOMPUTE_THRESHOLD     42
 603 #define SET_STR_THRESHOLD              3259
 604
 605 /* mpn/generic/mul_fft.c is not nails-capable. */
 606 #undef  MUL_FFT_THRESHOLD
 607 #undef  SQR_FFT_THRESHOLD
 608 #define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
 609 #define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
 610 #endif
 611
 612 /* Swap macros. */
 613
 614 #define MP_LIMB_T_SWAP(x, y)                    \
 615   do {                                          \
 616     mp_limb_t __mp_limb_t_swap__tmp = (x);      \
 617     (x) = (y);                                  \
 618     (y) = __mp_limb_t_swap__tmp;                \
 619   } while (0)
 620 #define MP_SIZE_T_SWAP(x, y)                    \
 621   do {                                          \
 622     mp_size_t __mp_size_t_swap__tmp = (x);      \
 623     (x) = (y);                                  \
 624     (y) = __mp_size_t_swap__tmp;                \
 625   } while (0)
 626
 627 #define MP_PTR_SWAP(x, y)               \
 628   do {                                  \
 629     mp_ptr __mp_ptr_swap__tmp = (x);    \
 630     (x) = (y);                          \
 631     (y) = __mp_ptr_swap__tmp;           \
 632   } while (0)
 633 #define MP_SRCPTR_SWAP(x, y)                    \
 634   do {                                          \
 635     mp_srcptr __mp_srcptr_swap__tmp = (x);      \
 636     (x) = (y);                                  \
 637     (y) = __mp_srcptr_swap__tmp;                \
 638   } while (0)
 639
 640 #define MPN_PTR_SWAP(xp,xs, yp,ys)      \
 641   do {                                  \
 642     MP_PTR_SWAP (xp, yp);               \
 643     MP_SIZE_T_SWAP (xs, ys);            \
 644   } while(0)
 645 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys)   \
 646   do {                                  \
 647     MP_SRCPTR_SWAP (xp, yp);            \
 648     MP_SIZE_T_SWAP (xs, ys);            \
 649   } while(0)
 650
 651 #define MPZ_PTR_SWAP(x, y)              \
 652   do {                                  \
 653     mpz_ptr __mpz_ptr_swap__tmp = (x);  \
 654     (x) = (y);                          \
 655     (y) = __mpz_ptr_swap__tmp;          \
 656   } while (0)
 657 #define MPZ_SRCPTR_SWAP(x, y)                   \
 658   do {                                          \
 659     mpz_srcptr __mpz_srcptr_swap__tmp = (x);    \
 660     (x) = (y);                                  \
 661     (y) = __mpz_srcptr_swap__tmp;               \
 662   } while (0)
 663
 664
 665 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
 666    but current gcc (3.0) doesn't seem to support that.  */
 667 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
 668 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
 669 __GMP_DECLSPEC extern void   (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
 670
 671 void *__gmp_default_allocate _PROTO ((size_t));
 672 void *__gmp_default_reallocate _PROTO ((void *, size_t, size_t));
 673 void __gmp_default_free _PROTO ((void *, size_t));
 674
 675 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
 676   ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
 677 #define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
 678
 679 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
 680   ((type *) (*__gmp_reallocate_func)                            \
 681    (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
 682 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
 683   __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
 684
 685 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
 686 #define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
 687
 688 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)      \
 689   do {                                                          \
 690     if ((oldsize) != (newsize))                                 \
 691       (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
 692   } while (0)
 693
 694 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)   \
 695   do {                                                                  \
 696     if ((oldsize) != (newsize))                                         \
 697       (ptr) = (type *) (*__gmp_reallocate_func)                         \
 698         (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));    \
 699   } while (0)
 700
 701
 702 /* Dummy for non-gcc, code involving it will go dead. */
 703 #if ! defined (__GNUC__) || __GNUC__ < 2
 704 #define __builtin_constant_p(x)   0
 705 #endif
 706
 707
 708 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
 709    stack usage is compatible.  __attribute__ ((regparm (N))) helps by
 710    putting leading parameters in registers, avoiding extra stack.
 711
 712    regparm cannot be used with calls going through the PLT, because the
 713    binding code there may clobber the registers (%eax, %edx, %ecx) used for
 714    the regparm parameters.  Calls to local (ie. static) functions could
 715    still use this, if we cared to differentiate locals and globals.
 716
 717    On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
 718    -p or -pg profiling, since that version of gcc doesn't realize the
 719    .mcount calls will clobber the parameter registers.  Other systems are
 720    ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
 721    bother to try to detect this.  regparm is only an optimization so we just
 722    disable it when profiling (profiling being a slowdown anyway).  */
 723
 724 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
 725   && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
 726 #define USE_LEADING_REGPARM 1
 727 #else
 728 #define USE_LEADING_REGPARM 0
 729 #endif
 730
 731 /* Macros for altering parameter order according to regparm usage. */
 732 #if USE_LEADING_REGPARM
 733 #define REGPARM_2_1(a,b,x)    x,a,b
 734 #define REGPARM_3_1(a,b,c,x)  x,a,b,c
 735 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
 736 #else
 737 #define REGPARM_2_1(a,b,x)    a,b,x
 738 #define REGPARM_3_1(a,b,c,x)  a,b,c,x
 739 #define REGPARM_ATTR(n)
 740 #endif
 741
 742
 743 /* ASM_L gives a local label for a gcc asm block, for use when temporary
 744    local labels like "1:" might not be available, which is the case for
 745    instance on the x86s (the SCO assembler doesn't support them).
 746
 747    The label generated is made unique by including "%=" which is a unique
 748    number for each insn.  This ensures the same name can be used in multiple
 749    asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
 750    allowed there's no need for a label to be usable outside a single
 751    block.  */
 752
 753 #define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
 754
 755
 756 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
 757 #if 0
 758 /* FIXME: Check that these actually improve things.
 759    FIXME: Need a cld after each std.
 760    FIXME: Can't have inputs in clobbered registers, must describe them as
 761    dummy outputs, and add volatile. */
 762 #define MPN_COPY_INCR(DST, SRC, N)                                      \
 763   __asm__ ("cld\n\trep\n\tmovsl" : :                                    \
 764            "D" (DST), "S" (SRC), "c" (N) :                              \
 765            "cx", "di", "si", "memory")
 766 #define MPN_COPY_DECR(DST, SRC, N)                                      \
 767   __asm__ ("std\n\trep\n\tmovsl" : :                                    \
 768            "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :      \
 769            "cx", "di", "si", "memory")
 770 #endif
 771 #endif
 772
 773
 774 void __gmpz_aorsmul_1 _PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_limb_t v, mp_size_t sub))) REGPARM_ATTR(1);
 775 #define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
 776
 777 #define mpz_n_pow_ui __gmpz_n_pow_ui
 778 void    mpz_n_pow_ui _PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
 779
 780
 781 #define mpn_add_nc __MPN(add_nc)
 782 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 783
 784 #define mpn_addmul_1c __MPN(addmul_1c)
 785 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 786
 787 #define mpn_addmul_2 __MPN(addmul_2)
 788 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 789
 790 #define mpn_addmul_3 __MPN(addmul_3)
 791 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 792
 793 #define mpn_addmul_4 __MPN(addmul_4)
 794 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 795
 796 #define mpn_addmul_5 __MPN(addmul_5)
 797 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 798
 799 #define mpn_addmul_6 __MPN(addmul_6)
 800 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 801
 802 #define mpn_addmul_7 __MPN(addmul_7)
 803 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 804
 805 #define mpn_addmul_8 __MPN(addmul_8)
 806 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 807
 808 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
 809    returns the carry out (0, 1 or 2).  */
 810 #define mpn_addlsh1_n __MPN(addlsh1_n)
 811 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 812
 813 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
 814    returns the borrow out (0, 1 or 2).  */
 815 #define mpn_sublsh1_n __MPN(sublsh1_n)
 816 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 817
 818 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
 819    and returns the bit rshifted out (0 or 1).  */
 820 #define mpn_rsh1add_n __MPN(rsh1add_n)
 821 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 822
 823 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
 824    and returns the bit rshifted out (0 or 1).  If there's a borrow from the
 825    subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
 826    complement negative.  */
 827 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
 828 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 829
 830 #define mpn_addsub_n __MPN(addsub_n)
 831 __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 832
 833 #define mpn_addsub_nc __MPN(addsub_nc)
 834 __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 835
 836 #define mpn_divrem_1c __MPN(divrem_1c)
 837 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 838
 839 #define mpn_dump __MPN(dump)
 840 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
 841
 842 #define mpn_fib2_ui __MPN(fib2_ui)
 843 mp_size_t mpn_fib2_ui _PROTO ((mp_ptr, mp_ptr, unsigned long));
 844
 845 /* Remap names of internal mpn functions.  */
 846 #define __clz_tab               __MPN(clz_tab)
 847 #define mpn_udiv_w_sdiv         __MPN(udiv_w_sdiv)
 848
 849 #define mpn_gcd_finda   __MPN(gcd_finda)
 850 mp_limb_t mpn_gcd_finda _PROTO((const mp_limb_t cp[2])) __GMP_ATTRIBUTE_PURE;
 851
 852 #define mpn_jacobi_base __MPN(jacobi_base)
 853 int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1)) ATTRIBUTE_CONST;
 854
 855 #define mpn_mod_1c __MPN(mod_1c)
 856 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
 857
 858 #define mpn_mul_1c __MPN(mul_1c)
 859 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 860
 861 #define mpn_mul_2 __MPN(mul_2)
 862 mp_limb_t mpn_mul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 863
 864 #ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
 865 #define mpn_mul_basecase __MPN(mul_basecase)
 866 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
 867 #endif
 868
 869 #define mpn_mullow_n __MPN(mullow_n)
 870 __GMP_DECLSPEC void mpn_mullow_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 871
 872 #define mpn_mullow_basecase __MPN(mullow_basecase)
 873 __GMP_DECLSPEC void mpn_mullow_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
 874
 875 #define mpn_sqr_n __MPN(sqr_n)
 876 __GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
 877
 878 #ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
 879 #define mpn_sqr_basecase __MPN(sqr_basecase)
 880 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
 881 #endif
 882
 883 #define mpn_sub_nc __MPN(sub_nc)
 884 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
 885
 886 #define mpn_submul_1c __MPN(submul_1c)
 887 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
 888
 889 #define mpn_invert_2exp __MPN(invert_2exp)
 890 __GMP_DECLSPEC void mpn_invert_2exp __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
 891
 892 #define mpn_redc_1 __MPN(redc_1)
 893 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);)
 894
 895 #define mpn_redc_2 __MPN(redc_2)
 896 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
 897
 898
 899 typedef __gmp_randstate_struct *gmp_randstate_ptr;
 900 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
 901
 902 /* Pseudo-random number generator function pointers structure.  */
 903 typedef struct {
 904   void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t rstate, mpz_srcptr seed));
 905   void (*randget_fn) __GMP_PROTO ((gmp_randstate_t rstate, mp_ptr dest, unsigned long int nbits));
 906   void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t rstate));
 907   void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
 908 } gmp_randfnptr_t;
 909
 910 /* Macro to obtain a void pointer to the function pointers structure.  */
 911 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
 912
 913 /* Macro to obtain a pointer to the generator's state.
 914    When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
 915 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
 916
 917 /* Write a given number of random bits to rp.  */
 918 #define _gmp_rand(rp, state, bits)                              \
 919   do {                                                          \
 920     gmp_randstate_ptr  __rstate = (state);                      \
 921     (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)   \
 922        (__rstate, rp, bits);                                    \
 923   } while (0)
 924
 925 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
 926
 927
 928 /* __gmp_rands is the global state for the old-style random functions, and
 929    is also used in the test programs (hence the __GMP_DECLSPEC).
 930
 931    There's no seeding here, so mpz_random etc will generate the same
 932    sequence every time.  This is not unlike the C library random functions
 933    if you don't seed them, so perhaps it's acceptable.  Digging up a seed
 934    from /dev/random or the like would work on many systems, but might
 935    encourage a false confidence, since it'd be pretty much impossible to do
 936    something that would work reliably everywhere.  In any case the new style
 937    functions are recommended to applications which care about randomness, so
 938    the old functions aren't too important.  */
 939
 940 __GMP_DECLSPEC extern char             __gmp_rands_initialized;
 941 __GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
 942
 943 #define RANDS                                       \
 944   ((__gmp_rands_initialized ? 0                     \
 945     : (__gmp_rands_initialized = 1,                 \
 946        __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
 947    __gmp_rands)
 948
 949 /* this is used by the test programs, to free memory */
 950 #define RANDS_CLEAR()                   \
 951   do {                                  \
 952     if (__gmp_rands_initialized)        \
 953       {                                 \
 954         __gmp_rands_initialized = 0;    \
 955         gmp_randclear (__gmp_rands);    \
 956       }                                 \
 957   } while (0)
 958
 959
 960 /* kara uses n+1 limbs of temporary space and then recurses with the balance,
 961    so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ...  This can be solved to
 962    2n + o(n).  Since n is very limited, o(n) in practice could be around 15.
 963    For now, assume n is arbitrarily large.  */
 964 #define MPN_KARA_MUL_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)
 965 #define MPN_KARA_SQR_N_TSIZE(n)   (2*(n) + 2*GMP_LIMB_BITS)
 966
 967 /* toom3 uses 2n + 2n/3 + o(n) limbs of temporary space if mpn_sublsh1_n is
 968    unavailable, but just 2n + o(n) if mpn_sublsh1_n is available.  It is hard
 969    to pin down the value of o(n), since it is a complex function of
 970    MUL_TOOM3_THRESHOLD and n.  Normally toom3 is used between kara and fft; in
 971    that case o(n) will be really limited.  If toom3 is used for arbitrarily
 972    large operands, o(n) will be larger.  These definitions handle operands of
 973    up to 8956264246117233 limbs.  A single multiplication using toom3 on the
 974    fastest hardware currently (2003) would need 100 million years, which
 975    suggests that these limits are acceptable.  */
 976 #if WANT_FFT
 977 #if HAVE_NATIVE_mpn_sublsh1_n
 978 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 63)
 979 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 63)
 980 #else
 981 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)
 982 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 63)
 983 #endif
 984 #else /* WANT_FFT */
 985 #if HAVE_NATIVE_mpn_sublsh1_n
 986 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 255)
 987 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 255)
 988 #else
 989 #define MPN_TOOM3_MUL_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)
 990 #define MPN_TOOM3_SQR_N_TSIZE(n)  (2*(n) + 2*(n/3) + 255)
 991 #endif
 992 #define MPN_TOOM3_MAX_N 285405
 993 #endif /* WANT_FFT */
 994
 995 /* need 2 so that n2>=1 */
 996 #define MPN_KARA_MUL_N_MINSIZE    2
 997 #define MPN_KARA_SQR_N_MINSIZE    2
 998
 999 /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */
1000 #define MPN_TOOM3_MUL_N_MINSIZE   17
1001 #define MPN_TOOM3_SQR_N_MINSIZE   17
1002
1003 #define mpn_sqr_diagonal __MPN(sqr_diagonal)
1004 void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1005
1006 #define mpn_kara_mul_n  __MPN(kara_mul_n)
1007 void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
1008
1009 #define mpn_kara_sqr_n  __MPN(kara_sqr_n)
1010 void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1011
1012 #define mpn_toom3_mul_n  __MPN(toom3_mul_n)
1013 void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
1014
1015 #define mpn_toom3_sqr_n  __MPN(toom3_sqr_n)
1016 void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1017
1018
1019 #define mpn_fft_best_k __MPN(fft_best_k)
1020 int     mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)) ATTRIBUTE_CONST;
1021
1022 #define mpn_mul_fft  __MPN(mul_fft)
1023 int mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
1024                           mp_srcptr n, mp_size_t nl,
1025                           mp_srcptr m, mp_size_t ml,
1026                           int k));
1027
1028 #define mpn_mul_fft_full  __MPN(mul_fft_full)
1029 void mpn_mul_fft_full _PROTO ((mp_ptr op,
1030                                mp_srcptr n, mp_size_t nl,
1031                                mp_srcptr m, mp_size_t ml));
1032
1033 #define   mpn_fft_next_size __MPN(fft_next_size)
1034 mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)) ATTRIBUTE_CONST;
1035
1036 #define mpn_sb_divrem_mn  __MPN(sb_divrem_mn)
1037 mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t,
1038                                     mp_srcptr, mp_size_t));
1039
1040 #define mpn_dc_divrem_n  __MPN(dc_divrem_n)
1041 mp_limb_t mpn_dc_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
1042
1043 /* #define mpn_tdiv_q  __MPN(tdiv_q) */
1044 /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
1045
1046 #define mpz_divexact_gcd  __gmpz_divexact_gcd
1047 void mpz_divexact_gcd _PROTO ((mpz_ptr q, mpz_srcptr a, mpz_srcptr d));
1048
1049 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1050 #ifdef _GMP_H_HAVE_FILE
1051 size_t mpz_inp_str_nowhite _PROTO ((mpz_ptr x, FILE *stream, int base, int c, size_t nread));
1052 #endif
1053
1054 #define mpn_divisible_p __MPN(divisible_p)
1055 int     mpn_divisible_p _PROTO ((mp_srcptr ap, mp_size_t asize,
1056                                  mp_srcptr dp, mp_size_t dsize)) __GMP_ATTRIBUTE_PURE;
1057
1058 #define mpn_rootrem __MPN(rootrem)
1059 mp_size_t mpn_rootrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1060
1061
1062 #if defined (_CRAY)
1063 #define MPN_COPY_INCR(dst, src, n)                                      \
1064   do {                                                                  \
1065     int __i;            /* Faster on some Crays with plain int */       \
1066     _Pragma ("_CRI ivdep");                                             \
1067     for (__i = 0; __i < (n); __i++)                                     \
1068       (dst)[__i] = (src)[__i];                                          \
1069   } while (0)
1070 #endif
1071
1072 /* used by test programs, hence __GMP_DECLSPEC */
1073 #ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
1074 #define mpn_copyi __MPN(copyi)
1075 __GMP_DECLSPEC void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1076 #endif
1077
1078 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1079 #define MPN_COPY_INCR(dst, src, size)                   \
1080   do {                                                  \
1081     ASSERT ((size) >= 0);                               \
1082     ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));       \
1083     mpn_copyi (dst, src, size);                         \
1084   } while (0)
1085 #endif
1086
1087 /* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
1088 #if ! defined (MPN_COPY_INCR)
1089 #define MPN_COPY_INCR(dst, src, n)                      \
1090   do {                                                  \
1091     ASSERT ((n) >= 0);                                  \
1092     ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));          \
1093     if ((n) != 0)                                       \
1094       {                                                 \
1095         mp_size_t __n = (n) - 1;                        \
1096         mp_ptr __dst = (dst);                           \
1097         mp_srcptr __src = (src);                        \
1098         mp_limb_t __x;                                  \
1099         __x = *__src++;                                 \
1100         if (__n != 0)                                   \
1101           {                                             \
1102             do                                          \
1103               {                                         \
1104                 *__dst++ = __x;                         \
1105                 __x = *__src++;                         \
1106               }                                         \
1107             while (--__n);                              \
1108           }                                             \
1109         *__dst++ = __x;                                 \
1110       }                                                 \
1111   } while (0)
1112 #endif
1113
1114
1115 #if defined (_CRAY)
1116 #define MPN_COPY_DECR(dst, src, n)                                      \
1117   do {                                                                  \
1118     int __i;            /* Faster on some Crays with plain int */       \
1119     _Pragma ("_CRI ivdep");                                             \
1120     for (__i = (n) - 1; __i >= 0; __i--)                                \
1121       (dst)[__i] = (src)[__i];                                          \
1122   } while (0)
1123 #endif
1124
1125 /* used by test programs, hence __GMP_DECLSPEC */
1126 #ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
1127 #define mpn_copyd __MPN(copyd)
1128 __GMP_DECLSPEC void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1129 #endif
1130
1131 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1132 #define MPN_COPY_DECR(dst, src, size)                   \
1133   do {                                                  \
1134     ASSERT ((size) >= 0);                               \
1135     ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));       \
1136     mpn_copyd (dst, src, size);                         \
1137   } while (0)
1138 #endif
1139
1140 /* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
1141 #if ! defined (MPN_COPY_DECR)
1142 #define MPN_COPY_DECR(dst, src, n)                      \
1143   do {                                                  \
1144     ASSERT ((n) >= 0);                                  \
1145     ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));          \
1146     if ((n) != 0)                                       \
1147       {                                                 \
1148         mp_size_t __n = (n) - 1;                        \
1149         mp_ptr __dst = (dst) + __n;                     \
1150         mp_srcptr __src = (src) + __n;                  \
1151         mp_limb_t __x;                                  \
1152         __x = *__src--;                                 \
1153         if (__n != 0)                                   \
1154           {                                             \
1155             do                                          \
1156               {                                         \
1157                 *__dst-- = __x;                         \
1158                 __x = *__src--;                         \
1159               }                                         \
1160             while (--__n);                              \
1161           }                                             \
1162         *__dst-- = __x;                                 \
1163       }                                                 \
1164   } while (0)
1165 #endif
1166
1167
1168 #ifndef MPN_COPY
1169 #define MPN_COPY(d,s,n)                         \
1170   do {                                          \
1171     ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));  \
1172     MPN_COPY_INCR (d, s, n);                    \
1173   } while (0)
1174 #endif
1175
1176
1177 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1178 #define MPN_REVERSE(dst, src, size)                     \
1179   do {                                                  \
1180     mp_ptr     __dst = (dst);                           \
1181     mp_size_t  __size = (size);                         \
1182     mp_srcptr  __src = (src) + __size - 1;              \
1183     mp_size_t  __i;                                     \
1184     ASSERT ((size) >= 0);                               \
1185     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
1186     CRAY_Pragma ("_CRI ivdep");                         \
1187     for (__i = 0; __i < __size; __i++)                  \
1188       {                                                 \
1189         *__dst = *__src;                                \
1190         __dst++;                                        \
1191         __src--;                                        \
1192       }                                                 \
1193   } while (0)
1194
1195
1196 /* Zero n limbs at dst.
1197
1198    For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
1199    ppc630 for instance this is optimal since it can sustain only 1 store per
1200    cycle.
1201
1202    gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1203    "for" loop in the generic code below can become stu/bdnz.  The do/while
1204    here helps it get to that.  The same caveat about plain -mpowerpc64 mode
1205    applies here as to __GMPN_COPY_INCR in gmp.h.
1206
1207    xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1208    this loop too.
1209
1210    Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1211    at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
1212    trouble than it's worth to do the same, though perhaps a call to memset
1213    would be good when on a GNU system.  */
1214
1215 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1216 #define MPN_ZERO(dst, n)                        \
1217   do {                                          \
1218     ASSERT ((n) >= 0);                          \
1219     if ((n) != 0)                               \
1220       {                                         \
1221         mp_ptr __dst = (dst) - 1;               \
1222         mp_size_t __n = (n);                    \
1223         do                                      \
1224           *++__dst = 0;                         \
1225         while (--__n);                          \
1226       }                                         \
1227   } while (0)
1228 #endif
1229
1230 #ifndef MPN_ZERO
1231 #define MPN_ZERO(dst, n)                        \
1232   do {                                          \
1233     ASSERT ((n) >= 0);                          \
1234     if ((n) != 0)                               \
1235       {                                         \
1236         mp_ptr __dst = (dst);                   \
1237         mp_size_t __n = (n);                    \
1238         do                                      \
1239           *__dst++ = 0;                         \
1240         while (--__n);                          \
1241       }                                         \
1242   } while (0)
1243 #endif
1244
1245
1246 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1247    start up and would need to strip a lot of zeros before it'd be faster
1248    than a simple cmpl loop.  Here are some times in cycles for
1249    std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1250    low zeros).
1251
1252                 std   cld
1253            P5    18    16
1254            P6    46    38
1255            K6    36    13
1256            K7    21    20
1257 */
1258 #ifndef MPN_NORMALIZE
1259 #define MPN_NORMALIZE(DST, NLIMBS) \
1260   do {                                                                  \
1261     while ((NLIMBS) > 0)                                                \
1262       {                                                                 \
1263         if ((DST)[(NLIMBS) - 1] != 0)                                   \
1264           break;                                                        \
1265         (NLIMBS)--;                                                     \
1266       }                                                                 \
1267   } while (0)
1268 #endif
1269 #ifndef MPN_NORMALIZE_NOT_ZERO
1270 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)     \
1271   do {                                          \
1272     ASSERT ((NLIMBS) >= 1);                     \
1273     while (1)                                   \
1274       {                                         \
1275         if ((DST)[(NLIMBS) - 1] != 0)           \
1276           break;                                \
1277         (NLIMBS)--;                             \
1278       }                                         \
1279   } while (0)
1280 #endif
1281
1282 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1283    and decrementing size.  low should be ptr[0], and will be the new ptr[0]
1284    on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
1285    somewhere a non-zero limb.  */
1286 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)    \
1287   do {                                                  \
1288     ASSERT ((size) >= 1);                               \
1289     ASSERT ((low) == (ptr)[0]);                         \
1290                                                         \
1291     while ((low) == 0)                                  \
1292       {                                                 \
1293         (size)--;                                       \
1294         ASSERT ((size) >= 1);                           \
1295         (ptr)++;                                        \
1296         (low) = *(ptr);                                 \
1297       }                                                 \
1298   } while (0)
1299
1300 /* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
1301    temporary variable; it will be automatically cleared out at function
1302    return.  We use __x here to make it possible to accept both mpz_ptr and
1303    mpz_t arguments.  */
1304 #define MPZ_TMP_INIT(X, NLIMBS)                                         \
1305   do {                                                                  \
1306     mpz_ptr __x = (X);                                                  \
1307     ASSERT ((NLIMBS) >= 1);                                             \
1308     __x->_mp_alloc = (NLIMBS);                                          \
1309     __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB);     \
1310   } while (0)
1311
1312 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
1313 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))     \
1314                           ? (mp_ptr) _mpz_realloc(z,n)  \
1315                           : PTR(z))
1316
1317 #define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
1318
1319
1320 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1321    f1p.
1322
1323    From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1324    nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
1325    number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1326
1327    The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1328    without good floating point.  There's +2 for rounding up, and a further
1329    +2 since at the last step x limbs are doubled into a 2x+1 limb region
1330    whereas the actual F[2k] value might be only 2x-1 limbs.
1331
1332    Note that a division is done first, since on a 32-bit system it's at
1333    least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
1334    about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1335    whatever a multiply of two 190Mbyte numbers takes.)
1336
1337    Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1338    worked into the multiplier.  */
1339
1340 #define MPN_FIB2_SIZE(n) \
1341   ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1342
1343
1344 /* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
1345    -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1346
1347    FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1348    F[n] + 2*F[n-1] fits in a limb.  */
1349
1350 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1351 #define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
1352
1353
1354 /* For a threshold between algorithms A and B, size>=thresh is where B
1355    should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
1356    value 0 means only ever use B.  The tests for these special values will
1357    be compile-time constants, so the compiler should be able to eliminate
1358    the code for the unwanted algorithm.  */
1359
1360 #define ABOVE_THRESHOLD(size,thresh)    \
1361   ((thresh) == 0                        \
1362    || ((thresh) != MP_SIZE_T_MAX        \
1363        && (size) >= (thresh)))
1364 #define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
1365
1366 /* Usage: int  use_foo = BELOW_THRESHOLD (size, FOO_THRESHOLD);
1367           ...
1368           if (CACHED_BELOW_THRESHOLD (use_foo, size, FOO_THRESHOLD))
1369
1370    When "use_foo" is a constant (thresh is 0 or MP_SIZE_T), gcc prior to
1371    version 3.3 doesn't optimize away a test "if (use_foo)" when within a
1372    loop.  CACHED_BELOW_THRESHOLD helps it do so.  */
1373
1374 #define CACHED_ABOVE_THRESHOLD(cache, thresh)           \
1375   ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX           \
1376    ? ABOVE_THRESHOLD (0, thresh)                        \
1377    : (cache))
1378 #define CACHED_BELOW_THRESHOLD(cache, thresh)           \
1379   ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX           \
1380    ? BELOW_THRESHOLD (0, thresh)                        \
1381    : (cache))
1382
1383
1384 /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
1385    value which is good on most machines.  */
1386 #ifndef MUL_KARATSUBA_THRESHOLD
1387 #define MUL_KARATSUBA_THRESHOLD 32
1388 #endif
1389
1390 /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
1391    value which is good on most machines.  */
1392 #ifndef MUL_TOOM3_THRESHOLD
1393 #define MUL_TOOM3_THRESHOLD 128
1394 #endif
1395
1396 /* MUL_KARATSUBA_THRESHOLD_LIMIT is the maximum for MUL_KARATSUBA_THRESHOLD.
1397    In a normal build MUL_KARATSUBA_THRESHOLD is a constant and we use that.
1398    In a fat binary or tune program build MUL_KARATSUBA_THRESHOLD is a
1399    variable and a separate hard limit will have been defined.  Similarly for
1400    TOOM3.  */
1401 #ifndef MUL_KARATSUBA_THRESHOLD_LIMIT
1402 #define MUL_KARATSUBA_THRESHOLD_LIMIT  MUL_KARATSUBA_THRESHOLD
1403 #endif
1404 #ifndef MUL_TOOM3_THRESHOLD_LIMIT
1405 #define MUL_TOOM3_THRESHOLD_LIMIT  MUL_TOOM3_THRESHOLD
1406 #endif
1407 #ifndef MULLOW_BASECASE_THRESHOLD_LIMIT
1408 #define MULLOW_BASECASE_THRESHOLD_LIMIT  MULLOW_BASECASE_THRESHOLD
1409 #endif
1410
1411 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1412    mpn_mul_basecase in mpn_sqr_n.  Default is to use mpn_sqr_basecase
1413    always.  (Note that we certainly always want it if there's a native
1414    assembler mpn_sqr_basecase.)
1415
1416    If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
1417    before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
1418    karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0.  This oddity arises
1419    more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
1420    which mpn_sqr_basecase should be used, and that may be never.  */
1421
1422 #ifndef SQR_BASECASE_THRESHOLD
1423 #define SQR_BASECASE_THRESHOLD 0
1424 #endif
1425
1426 #ifndef SQR_KARATSUBA_THRESHOLD
1427 #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
1428 #endif
1429
1430 #ifndef SQR_TOOM3_THRESHOLD
1431 #define SQR_TOOM3_THRESHOLD 128
1432 #endif
1433
1434 /* See comments above about MUL_TOOM3_THRESHOLD_LIMIT.  */
1435 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1436 #define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
1437 #endif
1438
1439 /* First k to use for an FFT modF multiply.  A modF FFT is an order
1440    log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1441    whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
1442 #define FFT_FIRST_K  4
1443
1444 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1445 #ifndef MUL_FFT_MODF_THRESHOLD
1446 #define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM3_THRESHOLD * 3)
1447 #endif
1448 #ifndef SQR_FFT_MODF_THRESHOLD
1449 #define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
1450 #endif
1451
1452 /* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
1453    will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1454    NxN->2N multiply and not recursing into itself is an order
1455    log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1456    is the first better than toom3.  */
1457 #ifndef MUL_FFT_THRESHOLD
1458 #define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
1459 #endif
1460 #ifndef SQR_FFT_THRESHOLD
1461 #define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
1462 #endif
1463
1464 /* Table of thresholds for successive modF FFT "k"s.  The first entry is
1465    where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1466    etc.  See mpn_fft_best_k(). */
1467 #ifndef MUL_FFT_TABLE
1468 #define MUL_FFT_TABLE                           \
1469   { MUL_TOOM3_THRESHOLD * 4,   /* k=5 */        \
1470     MUL_TOOM3_THRESHOLD * 8,   /* k=6 */        \
1471     MUL_TOOM3_THRESHOLD * 16,  /* k=7 */        \
1472     MUL_TOOM3_THRESHOLD * 32,  /* k=8 */        \
1473     MUL_TOOM3_THRESHOLD * 96,  /* k=9 */        \
1474     MUL_TOOM3_THRESHOLD * 288, /* k=10 */       \
1475     0 }
1476 #endif
1477 #ifndef SQR_FFT_TABLE
1478 #define SQR_FFT_TABLE                           \
1479   { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */        \
1480     SQR_TOOM3_THRESHOLD * 8,   /* k=6 */        \
1481     SQR_TOOM3_THRESHOLD * 16,  /* k=7 */        \
1482     SQR_TOOM3_THRESHOLD * 32,  /* k=8 */        \
1483     SQR_TOOM3_THRESHOLD * 96,  /* k=9 */        \
1484     SQR_TOOM3_THRESHOLD * 288, /* k=10 */       \
1485     0 }
1486 #endif
1487
1488 #ifndef FFT_TABLE_ATTRS
1489 #define FFT_TABLE_ATTRS   static const
1490 #endif
1491
1492 #define MPN_FFT_TABLE_SIZE  16
1493
1494
1495 /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
1496    div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
1497    i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
1498
1499    Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
1500    for 3 as an average.  */
1501
1502 #ifndef DIV_DC_THRESHOLD
1503 #define DIV_DC_THRESHOLD    (3 * MUL_KARATSUBA_THRESHOLD)
1504 #endif
1505
1506
1507 /* Return non-zero if xp,xsize and yp,ysize overlap.
1508    If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1509    overlap.  If both these are false, there's an overlap. */
1510 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1511   ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1512 #define MEM_OVERLAP_P(xp, xsize, yp, ysize)     \
1513   (   (char *) (xp) + (xsize) > (char *) (yp)   \
1514    && (char *) (yp) + (ysize) > (char *) (xp))
1515
1516 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1517    overlapping.  Return zero if they're partially overlapping. */
1518 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size)    \
1519   MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1520 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)           \
1521   ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1522
1523 /* Return non-zero if dst,dsize and src,ssize are either identical or
1524    overlapping in a way suitable for an incrementing/decrementing algorithm.
1525    Return zero if they're partially overlapping in an unsuitable fashion. */
1526 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)             \
1527   ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1528 #define MPN_SAME_OR_INCR_P(dst, src, size)      \
1529   MPN_SAME_OR_INCR2_P(dst, size, src, size)
1530 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)             \
1531   ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1532 #define MPN_SAME_OR_DECR_P(dst, src, size)      \
1533   MPN_SAME_OR_DECR2_P(dst, size, src, size)
1534
1535
1536 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1537    ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1538    does it always.  Generally assertions are meant for development, but
1539    might help when looking for a problem later too.
1540
1541    Note that strings shouldn't be used within the ASSERT expression,
1542    eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1543    used in the !HAVE_STRINGIZE case (ie. K&R).  */
1544
1545 #ifdef __LINE__
1546 #define ASSERT_LINE  __LINE__
1547 #else
1548 #define ASSERT_LINE  -1
1549 #endif
1550
1551 #ifdef __FILE__
1552 #define ASSERT_FILE  __FILE__
1553 #else
1554 #define ASSERT_FILE  ""
1555 #endif
1556
1557 void __gmp_assert_header _PROTO ((const char *filename, int linenum));
1558 __GMP_DECLSPEC void __gmp_assert_fail _PROTO ((const char *filename, int linenum, const char *expr)) ATTRIBUTE_NORETURN;
1559
1560 #if HAVE_STRINGIZE
1561 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1562 #else
1563 #define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
1564 #endif
1565
1566 #define ASSERT_ALWAYS(expr)     \
1567   do {                          \
1568     if (!(expr))                \
1569       ASSERT_FAIL (expr);       \
1570   } while (0)
1571
1572 #if WANT_ASSERT
1573 #define ASSERT(expr)   ASSERT_ALWAYS (expr)
1574 #else
1575 #define ASSERT(expr)   do {} while (0)
1576 #endif
1577
1578
1579 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
1580    that it's zero.  In both cases if assertion checking is disabled the
1581    expression is still evaluated.  These macros are meant for use with
1582    routines like mpn_add_n() where the return value represents a carry or
1583    whatever that should or shouldn't occur in some context.  For example,
1584    ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
1585 #if WANT_ASSERT
1586 #define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
1587 #define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
1588 #else
1589 #define ASSERT_CARRY(expr)     (expr)
1590 #define ASSERT_NOCARRY(expr)   (expr)
1591 #endif
1592
1593
1594 /* ASSERT_CODE includes code when assertion checking is wanted.  This is the
1595    same as writing "#if WANT_ASSERT", but more compact.  */
1596 #if WANT_ASSERT
1597 #define ASSERT_CODE(expr)  expr
1598 #else
1599 #define ASSERT_CODE(expr)
1600 #endif
1601
1602
1603 /* Test that an mpq_t is in fully canonical form.  This can be used as
1604    protection on routines like mpq_equal which give wrong results on
1605    non-canonical inputs.  */
1606 #if WANT_ASSERT
1607 #define ASSERT_MPQ_CANONICAL(q)                         \
1608   do {                                                  \
1609     ASSERT (q->_mp_den._mp_size > 0);                   \
1610     if (q->_mp_num._mp_size == 0)                       \
1611       {                                                 \
1612         /* zero should be 0/1 */                        \
1613         ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);   \
1614       }                                                 \
1615     else                                                \
1616       {                                                 \
1617         /* no common factors */                         \
1618         mpz_t  __g;                                     \
1619         mpz_init (__g);                                 \
1620         mpz_gcd (__g, mpq_numref(q), mpq_denref(q));    \
1621         ASSERT (mpz_cmp_ui (__g, 1) == 0);              \
1622         mpz_clear (__g);                                \
1623       }                                                 \
1624   } while (0)
1625 #else
1626 #define ASSERT_MPQ_CANONICAL(q)  do {} while (0)
1627 #endif
1628
1629 /* Check that the nail parts are zero. */
1630 #define ASSERT_ALWAYS_LIMB(limb)                \
1631   do {                                          \
1632     mp_limb_t  __nail = (limb) & GMP_NAIL_MASK; \
1633     ASSERT_ALWAYS (__nail == 0);                \
1634   } while (0)
1635 #define ASSERT_ALWAYS_MPN(ptr, size)            \
1636   do {                                          \
1637     /* let whole loop go dead when no nails */  \
1638     if (GMP_NAIL_BITS != 0)                     \
1639       {                                         \
1640         mp_size_t  __i;                         \
1641         for (__i = 0; __i < (size); __i++)      \
1642           ASSERT_ALWAYS_LIMB ((ptr)[__i]);      \
1643       }                                         \
1644   } while (0)
1645 #if WANT_ASSERT
1646 #define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
1647 #define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
1648 #else
1649 #define ASSERT_LIMB(limb)       do {} while (0)
1650 #define ASSERT_MPN(ptr, size)   do {} while (0)
1651 #endif
1652
1653
1654 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
1655    size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
1656 #if WANT_ASSERT
1657 #define ASSERT_MPN_ZERO_P(ptr,size)     \
1658   do {                                  \
1659     mp_size_t  __i;                     \
1660     ASSERT ((size) >= 0);               \
1661     for (__i = 0; __i < (size); __i++)  \
1662       ASSERT ((ptr)[__i] == 0);         \
1663   } while (0)
1664 #define ASSERT_MPN_NONZERO_P(ptr,size)  \
1665   do {                                  \
1666     mp_size_t  __i;                     \
1667     int        __nonzero = 0;           \
1668     ASSERT ((size) >= 0);               \
1669     for (__i = 0; __i < (size); __i++)  \
1670       if ((ptr)[__i] != 0)              \
1671         {                               \
1672           __nonzero = 1;                \
1673           break;                        \
1674         }                               \
1675     ASSERT (__nonzero);                 \
1676   } while (0)
1677 #else
1678 #define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
1679 #define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
1680 #endif
1681
1682
1683 #if HAVE_NATIVE_mpn_com_n
1684 #define mpn_com_n __MPN(com_n)
1685 void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1686 #else
1687 #define mpn_com_n(d,s,n)                                \
1688   do {                                                  \
1689     mp_ptr     __d = (d);                               \
1690     mp_srcptr  __s = (s);                               \
1691     mp_size_t  __n = (n);                               \
1692     ASSERT (__n >= 1);                                  \
1693     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));    \
1694     do                                                  \
1695       *__d++ = (~ *__s++) & GMP_NUMB_MASK;              \
1696     while (--__n);                                      \
1697   } while (0)
1698 #endif
1699
1700 #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation)    \
1701   do {                                                  \
1702     mp_ptr       __d = (d);                             \
1703     mp_srcptr    __s1 = (s1);                           \
1704     mp_srcptr    __s2 = (s2);                           \
1705     mp_size_t    __n = (n);                             \
1706     ASSERT (__n >= 1);                                  \
1707     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n));   \
1708     ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n));   \
1709     do                                                  \
1710       operation;                                        \
1711     while (--__n);                                      \
1712   } while (0)
1713
1714 #if HAVE_NATIVE_mpn_and_n
1715 #define mpn_and_n __MPN(and_n)
1716 void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1717 #else
1718 #define mpn_and_n(d, s1, s2, n) \
1719   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)
1720 #endif
1721
1722 #if HAVE_NATIVE_mpn_andn_n
1723 #define mpn_andn_n __MPN(andn_n)
1724 void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1725 #else
1726 #define mpn_andn_n(d, s1, s2, n) \
1727   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)
1728 #endif
1729
1730 #if HAVE_NATIVE_mpn_nand_n
1731 #define mpn_nand_n __MPN(nand_n)
1732 void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1733 #else
1734 #define mpn_nand_n(d, s1, s2, n) \
1735   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ & *__s2++) & GMP_NUMB_MASK)
1736 #endif
1737
1738 #if HAVE_NATIVE_mpn_ior_n
1739 #define mpn_ior_n __MPN(ior_n)
1740 void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1741 #else
1742 #define mpn_ior_n(d, s1, s2, n) \
1743   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)
1744 #endif
1745
1746 #if HAVE_NATIVE_mpn_iorn_n
1747 #define mpn_iorn_n __MPN(iorn_n)
1748 void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1749 #else
1750 #define mpn_iorn_n(d, s1, s2, n) \
1751   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = (*__s1++ | ~*__s2++) & GMP_NUMB_MASK)
1752 #endif
1753
1754 #if HAVE_NATIVE_mpn_nior_n
1755 #define mpn_nior_n __MPN(nior_n)
1756 void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1757 #else
1758 #define mpn_nior_n(d, s1, s2, n) \
1759   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ | *__s2++) & GMP_NUMB_MASK)
1760 #endif
1761
1762 #if HAVE_NATIVE_mpn_xor_n
1763 #define mpn_xor_n __MPN(xor_n)
1764 void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1765 #else
1766 #define mpn_xor_n(d, s1, s2, n) \
1767   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)
1768 #endif
1769
1770 #if HAVE_NATIVE_mpn_xnor_n
1771 #define mpn_xnor_n __MPN(xnor_n)
1772 void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1773 #else
1774 #define mpn_xnor_n(d, s1, s2, n) \
1775   MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ ^ *__s2++) & GMP_NUMB_MASK)
1776 #endif
1777
1778
1779 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
1780 #if GMP_NAIL_BITS == 0
1781 #define ADDC_LIMB(cout, w, x, y)        \
1782   do {                                  \
1783     mp_limb_t  __x = (x);               \
1784     mp_limb_t  __y = (y);               \
1785     mp_limb_t  __w = __x + __y;         \
1786     (w) = __w;                          \
1787     (cout) = __w < __x;                 \
1788   } while (0)
1789 #else
1790 #define ADDC_LIMB(cout, w, x, y)        \
1791   do {                                  \
1792     mp_limb_t  __w;                     \
1793     ASSERT_LIMB (x);                    \
1794     ASSERT_LIMB (y);                    \
1795     __w = (x) + (y);                    \
1796     (w) = __w & GMP_NUMB_MASK;          \
1797     (cout) = __w >> GMP_NUMB_BITS;      \
1798   } while (0)
1799 #endif
1800
1801 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
1802    subtract.  */
1803 #if GMP_NAIL_BITS == 0
1804 #define SUBC_LIMB(cout, w, x, y)        \
1805   do {                                  \
1806     mp_limb_t  __x = (x);               \
1807     mp_limb_t  __y = (y);               \
1808     mp_limb_t  __w = __x - __y;         \
1809     (w) = __w;                          \
1810     (cout) = __w > __x;                 \
1811   } while (0)
1812 #else
1813 #define SUBC_LIMB(cout, w, x, y)        \
1814   do {                                  \
1815     mp_limb_t  __w = (x) - (y);         \
1816     (w) = __w & GMP_NUMB_MASK;          \
1817     (cout) = __w >> (GMP_LIMB_BITS-1);  \
1818   } while (0)
1819 #endif
1820
1821
1822 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
1823    expecting no carry (or borrow) from that.
1824
1825    The size parameter is only for the benefit of assertion checking.  In a
1826    normal build it's unused and the carry/borrow is just propagated as far
1827    as it needs to go.
1828
1829    On random data, usually only one or two limbs of {ptr,size} get updated,
1830    so there's no need for any sophisticated looping, just something compact
1831    and sensible.
1832
1833    FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
1834    declaring their operand sizes, then remove the former.  This is purely
1835    for the benefit of assertion checking.  */
1836
1837 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0      \
1838   && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
1839 /* Better flags handling than the generic C gives on i386, saving a few
1840    bytes of code and maybe a cycle or two.  */
1841
1842 #define MPN_IORD_U(ptr, incr, aors)                                     \
1843   do {                                                                  \
1844     mp_ptr  __ptr_dummy;                                                \
1845     if (__builtin_constant_p (incr) && (incr) == 1)                     \
1846       {                                                                 \
1847         __asm__ __volatile__                                            \
1848           ("\n" ASM_L(top) ":\n"                                        \
1849            "\t" aors " $1, (%0)\n"                                      \
1850            "\tleal 4(%0),%0\n"                                          \
1851            "\tjc " ASM_L(top)                                           \
1852            : "=r" (__ptr_dummy)                                         \
1853            : "0"  (ptr)                                                 \
1854            : "memory");                                                 \
1855       }                                                                 \
1856     else                                                                \
1857       {                                                                 \
1858         __asm__ __volatile__                                            \
1859           (   aors  " %2,(%0)\n"                                        \
1860            "\tjnc " ASM_L(done) "\n"                                    \
1861            ASM_L(top) ":\n"                                             \
1862            "\t" aors " $1,4(%0)\n"                                      \
1863            "\tleal 4(%0),%0\n"                                          \
1864            "\tjc " ASM_L(top) "\n"                                      \
1865            ASM_L(done) ":\n"                                            \
1866            : "=r" (__ptr_dummy)                                         \
1867            : "0"  (ptr),                                                \
1868              "ri" (incr)                                                \
1869            : "memory");                                                 \
1870       }                                                                 \
1871   } while (0)
1872
1873 #define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
1874 #define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
1875 #define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
1876 #define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
1877 #endif
1878
1879 #if GMP_NAIL_BITS == 0
1880 #ifndef mpn_incr_u
1881 #define mpn_incr_u(p,incr)                              \
1882   do {                                                  \
1883     mp_limb_t __x;                                      \
1884     mp_ptr __p = (p);                                   \
1885     if (__builtin_constant_p (incr) && (incr) == 1)     \
1886       {                                                 \
1887         while (++(*(__p++)) == 0)                       \
1888           ;                                             \
1889       }                                                 \
1890     else                                                \
1891       {                                                 \
1892         __x = *__p + (incr);                            \
1893         *__p = __x;                                     \
1894         if (__x < (incr))                               \
1895           while (++(*(++__p)) == 0)                     \
1896             ;                                           \
1897       }                                                 \
1898   } while (0)
1899 #endif
1900 #ifndef mpn_decr_u
1901 #define mpn_decr_u(p,incr)                              \
1902   do {                                                  \
1903     mp_limb_t __x;                                      \
1904     mp_ptr __p = (p);                                   \
1905     if (__builtin_constant_p (incr) && (incr) == 1)     \
1906       {                                                 \
1907         while ((*(__p++))-- == 0)                       \
1908           ;                                             \
1909       }                                                 \
1910     else                                                \
1911       {                                                 \
1912         __x = *__p;                                     \
1913         *__p = __x - (incr);                            \
1914         if (__x < (incr))                               \
1915           while ((*(++__p))-- == 0)                     \
1916             ;                                           \
1917       }                                                 \
1918   } while (0)
1919 #endif
1920 #endif
1921
1922 #if GMP_NAIL_BITS >= 1
1923 #ifndef mpn_incr_u
1924 #define mpn_incr_u(p,incr)                              \
1925   do {                                                  \
1926     mp_limb_t __x;                                      \
1927     mp_ptr __p = (p);                                   \
1928     if (__builtin_constant_p (incr) && (incr) == 1)     \
1929       {                                                 \
1930         do                                              \
1931           {                                             \
1932             __x = (*__p + 1) & GMP_NUMB_MASK;           \
1933             *__p++ = __x;                               \
1934           }                                             \
1935         while (__x == 0);                               \
1936       }                                                 \
1937     else                                                \
1938       {                                                 \
1939         __x = (*__p + (incr));                          \
1940         *__p++ = __x & GMP_NUMB_MASK;                   \
1941         if (__x >> GMP_NUMB_BITS != 0)                  \
1942           {                                             \
1943             do                                          \
1944               {                                         \
1945                 __x = (*__p + 1) & GMP_NUMB_MASK;       \
1946                 *__p++ = __x;                           \
1947               }                                         \
1948             while (__x == 0);                           \
1949           }                                             \
1950       }                                                 \
1951   } while (0)
1952 #endif
1953 #ifndef mpn_decr_u
1954 #define mpn_decr_u(p,incr)                              \
1955   do {                                                  \
1956     mp_limb_t __x;                                      \
1957     mp_ptr __p = (p);                                   \
1958     if (__builtin_constant_p (incr) && (incr) == 1)     \
1959       {                                                 \
1960         do                                              \
1961           {                                             \
1962             __x = *__p;                                 \
1963             *__p++ = (__x - 1) & GMP_NUMB_MASK;         \
1964           }                                             \
1965         while (__x == 0);                               \
1966       }                                                 \
1967     else                                                \
1968       {                                                 \
1969         __x = *__p - (incr);                            \
1970         *__p++ = __x & GMP_NUMB_MASK;                   \
1971         if (__x >> GMP_NUMB_BITS != 0)                  \
1972           {                                             \
1973             do                                          \
1974               {                                         \
1975                 __x = *__p;                             \
1976                 *__p++ = (__x - 1) & GMP_NUMB_MASK;     \
1977               }                                         \
1978             while (__x == 0);                           \
1979           }                                             \
1980       }                                                 \
1981   } while (0)
1982 #endif
1983 #endif
1984
1985 #ifndef MPN_INCR_U
1986 #if WANT_ASSERT
1987 #define MPN_INCR_U(ptr, size, n)                        \
1988   do {                                                  \
1989     ASSERT ((size) >= 1);                               \
1990     ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));     \
1991   } while (0)
1992 #else
1993 #define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
1994 #endif
1995 #endif
1996
1997 #ifndef MPN_DECR_U
1998 #if WANT_ASSERT
1999 #define MPN_DECR_U(ptr, size, n)                        \
2000   do {                                                  \
2001     ASSERT ((size) >= 1);                               \
2002     ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));     \
2003   } while (0)
2004 #else
2005 #define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
2006 #endif
2007 #endif
2008
2009
2010 /* Structure for conversion between internal binary format and
2011    strings in base 2..36.  */
2012 struct bases
2013 {
2014   /* Number of digits in the conversion base that always fits in an mp_limb_t.
2015      For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2016      is 9, since 10**9 is the largest number that fits into a mp_limb_t.  */
2017   int chars_per_limb;
2018
2019   /* log(2)/log(conversion_base) */
2020   double chars_per_bit_exactly;
2021
2022   /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2023      factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
2024      i.e. the number of bits used to represent each digit in the base.  */
2025   mp_limb_t big_base;
2026
2027   /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
2028      fixed-point number.  Instead of dividing by big_base an application can
2029      choose to multiply by big_base_inverted.  */
2030   mp_limb_t big_base_inverted;
2031 };
2032
2033 #define mp_bases __MPN(bases)
2034 #define __mp_bases __MPN(bases)
2035 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2036
2037
2038 /* For power of 2 bases this is exact.  For other bases the result is either
2039    exact or one too big.
2040
2041    To be exact always it'd be necessary to examine all the limbs of the
2042    operand, since numbers like 100..000 and 99...999 generally differ only
2043    in the lowest limb.  It'd be possible to examine just a couple of high
2044    limbs to increase the probability of being exact, but that doesn't seem
2045    worth bothering with.  */
2046
2047 #define MPN_SIZEINBASE(result, ptr, size, base)                         \
2048   do {                                                                  \
2049     int       __lb_base, __cnt;                                         \
2050     size_t __totbits;                                                   \
2051                                                                         \
2052     ASSERT ((size) >= 0);                                               \
2053     ASSERT ((base) >= 2);                                               \
2054     ASSERT ((base) < numberof (mp_bases));                              \
2055                                                                         \
2056     /* Special case for X == 0.  */                                     \
2057     if ((size) == 0)                                                    \
2058       (result) = 1;                                                     \
2059     else                                                                \
2060       {                                                                 \
2061         /* Calculate the total number of significant bits of X.  */     \
2062         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2063         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2064                                                                         \
2065         if (POW2_P (base))                                              \
2066           {                                                             \
2067             __lb_base = mp_bases[base].big_base;                        \
2068             (result) = (__totbits + __lb_base - 1) / __lb_base;         \
2069           }                                                             \
2070         else                                                            \
2071           (result) = (size_t)                                           \
2072             (__totbits * mp_bases[base].chars_per_bit_exactly) + 1;     \
2073       }                                                                 \
2074   } while (0)
2075
2076 /* eliminate mp_bases lookups for base==16 */
2077 #define MPN_SIZEINBASE_16(result, ptr, size)                            \
2078   do {                                                                  \
2079     int       __cnt;                                                    \
2080     mp_size_t __totbits;                                                \
2081                                                                         \
2082     ASSERT ((size) >= 0);                                               \
2083                                                                         \
2084     /* Special case for X == 0.  */                                     \
2085     if ((size) == 0)                                                    \
2086       (result) = 1;                                                     \
2087     else                                                                \
2088       {                                                                 \
2089         /* Calculate the total number of significant bits of X.  */     \
2090         count_leading_zeros (__cnt, (ptr)[(size)-1]);                   \
2091         __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2092         (result) = (__totbits + 4 - 1) / 4;                             \
2093       }                                                                 \
2094   } while (0)
2095
2096 /* bit count to limb count, rounding up */
2097 #define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2098
2099 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
2100    mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
2101    in both cases (LIMBS_PER_ULONG is also defined here.) */
2102 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2103
2104 #define LIMBS_PER_ULONG 1
2105 #define MPN_SET_UI(zp, zn, u)   \
2106   (zp)[0] = (u);                \
2107   (zn) = ((zp)[0] != 0);
2108 #define MPZ_FAKE_UI(z, zp, u)   \
2109   (zp)[0] = (u);                \
2110   PTR (z) = (zp);               \
2111   SIZ (z) = ((zp)[0] != 0);     \
2112   ASSERT_CODE (ALLOC (z) = 1);
2113
2114 #else /* need two limbs per ulong */
2115
2116 #define LIMBS_PER_ULONG 2
2117 #define MPN_SET_UI(zp, zn, u)                          \
2118   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2119   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2120   (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2121 #define MPZ_FAKE_UI(z, zp, u)                          \
2122   (zp)[0] = (u) & GMP_NUMB_MASK;                       \
2123   (zp)[1] = (u) >> GMP_NUMB_BITS;                      \
2124   SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2125   PTR (z) = (zp);                                      \
2126   ASSERT_CODE (ALLOC (z) = 2);
2127
2128 #endif
2129
2130
2131 #if HAVE_HOST_CPU_FAMILY_x86
2132 #define TARGET_REGISTER_STARVED 1
2133 #else
2134 #define TARGET_REGISTER_STARVED 0
2135 #endif
2136
2137
2138 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2139    or 0 there into a limb 0xFF..FF or 0 respectively.
2140
2141    On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2142    but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2143    a little compile-time test and a fallback to a "? :" form.  The latter is
2144    necessary for instance on Cray vector systems.
2145
2146    Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2147    to an arithmetic right shift anyway, but it's good to get the desired
2148    shift on past versions too (in particular since an important use of
2149    LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
2150
2151 #define LIMB_HIGHBIT_TO_MASK(n)                                 \
2152   (((mp_limb_signed_t) -1 >> 1) < 0                             \
2153    ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)              \
2154    : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2155
2156
2157 /* Use a library function for invert_limb, if available. */
2158 #define mpn_invert_limb  __MPN(invert_limb)
2159 mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2160 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2161 #define invert_limb(invxl,xl)           \
2162   do {                                  \
2163     (invxl) = mpn_invert_limb (xl);     \
2164   } while (0)
2165 #endif
2166
2167 #ifndef invert_limb
2168 #define invert_limb(invxl,xl)                   \
2169   do {                                          \
2170     mp_limb_t dummy;                            \
2171     ASSERT ((xl) != 0);                         \
2172     udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl);  \
2173   } while (0)
2174 #endif
2175
2176 #ifndef udiv_qrnnd_preinv
2177 #define udiv_qrnnd_preinv udiv_qrnnd_preinv2
2178 #endif
2179
2180 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2181    limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
2182    If this would yield overflow, DI should be the largest possible number
2183    (i.e., only ones).  For correct operation, the most significant bit of D
2184    has to be set.  Put the quotient in Q and the remainder in R.  */
2185 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di)                         \
2186   do {                                                                  \
2187     mp_limb_t _q, _ql, _r;                                              \
2188     mp_limb_t _xh, _xl;                                                 \
2189     ASSERT ((d) != 0);                                                  \
2190     umul_ppmm (_q, _ql, (nh), (di));                                    \
2191     _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */      \
2192     umul_ppmm (_xh, _xl, _q, (d));                                      \
2193     sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl);                         \
2194     if (_xh != 0)                                                       \
2195       {                                                                 \
2196         sub_ddmmss (_xh, _r, _xh, _r, 0, (d));                          \
2197         _q += 1;                                                        \
2198         if (_xh != 0)                                                   \
2199           {                                                             \
2200             _r -= (d);                                                  \
2201             _q += 1;                                                    \
2202           }                                                             \
2203       }                                                                 \
2204     if (_r >= (d))                                                      \
2205       {                                                                 \
2206         _r -= (d);                                                      \
2207         _q += 1;                                                        \
2208       }                                                                 \
2209     (r) = _r;                                                           \
2210     (q) = _q;                                                           \
2211   } while (0)
2212
2213 /* Like udiv_qrnnd_preinv, but branch-free. */
2214 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di)                         \
2215   do {                                                                  \
2216     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2217     mp_limb_t _xh, _xl;                                                 \
2218     _n2 = (nh);                                                         \
2219     _n10 = (nl);                                                        \
2220     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2221     _nadj = _n10 + (_nmask & (d));                                      \
2222     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2223     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2224     _q1 = ~_xh;                                                         \
2225     umul_ppmm (_xh, _xl, _q1, d);                                       \
2226     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2227     _xh -= (d);                                 /* xh = 0 or -1 */      \
2228     (r) = _xl + ((d) & _xh);                                            \
2229     (q) = _xh - _q1;                                                    \
2230   } while (0)
2231
2232 /* Like udiv_qrnnd_preinv2, but for for any value D.  DNORM is D shifted left
2233    so that its most significant bit is set.  LGUP is ceil(log2(D)).  */
2234 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2235   do {                                                                  \
2236     mp_limb_t _n2, _n10, _nmask, _nadj, _q1;                            \
2237     mp_limb_t _xh, _xl;                                                 \
2238     _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
2239     _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup));                         \
2240     _nmask = LIMB_HIGHBIT_TO_MASK (_n10);                               \
2241     _nadj = _n10 + (_nmask & (dnorm));                                  \
2242     umul_ppmm (_xh, _xl, di, _n2 - _nmask);                             \
2243     add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj);                        \
2244     _q1 = ~_xh;                                                         \
2245     umul_ppmm (_xh, _xl, _q1, d);                                       \
2246     add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl);                            \
2247     _xh -= (d);                                                         \
2248     (r) = _xl + ((d) & _xh);                                            \
2249     (q) = _xh - _q1;                                                    \
2250   } while (0)
2251
2252
2253 #ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
2254 #define mpn_preinv_divrem_1  __MPN(preinv_divrem_1)
2255 mp_limb_t mpn_preinv_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2256 #endif
2257
2258
2259 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
2260    the plain mpn_divrem_1.  Likewise USE_PREINV_MOD_1 chooses between
2261    mpn_preinv_mod_1 and plain mpn_mod_1.  The default for both is yes, since
2262    the few CISC chips where preinv is not good have defines saying so.  */
2263 #ifndef USE_PREINV_DIVREM_1
2264 #define USE_PREINV_DIVREM_1   1
2265 #endif
2266 #ifndef USE_PREINV_MOD_1
2267 #define USE_PREINV_MOD_1   1
2268 #endif
2269
2270 #if USE_PREINV_DIVREM_1
2271 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2272   mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2273 #else
2274 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
2275   mpn_divrem_1 (qp, xsize, ap, size, d)
2276 #endif
2277
2278 #if USE_PREINV_MOD_1
2279 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
2280   mpn_preinv_mod_1 (src, size, divisor, inverse)
2281 #else
2282 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)       \
2283   mpn_mod_1 (src, size, divisor)
2284 #endif
2285
2286
2287 #ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
2288 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
2289 mp_limb_t mpn_mod_34lsub1 _PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2290 #endif
2291
2292
2293 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2294    plain mpn_divrem_1.  Likewise MODEXACT_1_ODD_THRESHOLD for
2295    mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
2296    modexact are faster at all sizes, so the defaults are 0.  Those CPUs
2297    where this is not right have a tuned threshold.  */
2298 #ifndef DIVEXACT_1_THRESHOLD
2299 #define DIVEXACT_1_THRESHOLD  0
2300 #endif
2301 #ifndef MODEXACT_1_ODD_THRESHOLD
2302 #define MODEXACT_1_ODD_THRESHOLD  0
2303 #endif
2304
2305 #ifndef mpn_divexact_1  /* if not done with cpuvec in a fat binary */
2306 #define mpn_divexact_1 __MPN(divexact_1)
2307 void    mpn_divexact_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2308 #endif
2309
2310 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor)                     \
2311   do {                                                                        \
2312     if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD))                         \
2313       ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2314     else                                                                      \
2315       {                                                                       \
2316         ASSERT (mpn_mod_1 (src, size, divisor) == 0);                         \
2317         mpn_divexact_1 (dst, src, size, divisor);                             \
2318       }                                                                       \
2319   } while (0)
2320
2321 #ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
2322 #define mpn_modexact_1c_odd  __MPN(modexact_1c_odd)
2323 mp_limb_t mpn_modexact_1c_odd _PROTO ((mp_srcptr src, mp_size_t size,
2324                                        mp_limb_t divisor, mp_limb_t c)) __GMP_ATTRIBUTE_PURE;
2325 #endif
2326
2327 #if HAVE_NATIVE_mpn_modexact_1_odd
2328 #define mpn_modexact_1_odd   __MPN(modexact_1_odd)
2329 mp_limb_t mpn_modexact_1_odd _PROTO ((mp_srcptr src, mp_size_t size,
2330                                       mp_limb_t divisor)) __GMP_ATTRIBUTE_PURE;
2331 #else
2332 #define mpn_modexact_1_odd(src,size,divisor) \
2333   mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2334 #endif
2335
2336 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)                     \
2337   (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD)                     \
2338    ? mpn_modexact_1_odd (src, size, divisor)                            \
2339    : mpn_mod_1 (src, size, divisor))
2340
2341
2342 /* modlimb_invert() sets inv to the multiplicative inverse of n modulo
2343    2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2344    n must be odd (otherwise such an inverse doesn't exist).
2345
2346    This is not to be confused with invert_limb(), which is completely
2347    different.
2348
2349    The table lookup gives an inverse with the low 8 bits valid, and each
2350    multiply step doubles the number of bits.  See Jebelean "An algorithm for
2351    exact division" end of section 4 (reference in gmp.texi).
2352
2353    Possible enhancement: Could use UHWtype until the last step, if half-size
2354    multiplies are faster (might help under _LONG_LONG_LIMB).
2355
2356    Alternative: As noted in Granlund and Montgomery "Division by Invariant
2357    Integers using Multiplication" (reference in gmp.texi), n itself gives a
2358    3-bit inverse immediately, and could be used instead of a table lookup.
2359    A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2360    bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
2361
2362 #define modlimb_invert_table  __gmp_modlimb_invert_table
2363 __GMP_DECLSPEC extern const unsigned char  modlimb_invert_table[128];
2364
2365 #define modlimb_invert(inv,n)                                           \
2366   do {                                                                  \
2367     mp_limb_t  __n = (n);                                               \
2368     mp_limb_t  __inv;                                                   \
2369     ASSERT ((__n & 1) == 1);                                            \
2370                                                                         \
2371     __inv = modlimb_invert_table[(__n/2) & 0x7F]; /*  8 */              \
2372     if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;   \
2373     if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;   \
2374     if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;   \
2375                                                                         \
2376     if (GMP_NUMB_BITS > 64)                                             \
2377       {                                                                 \
2378         int  __invbits = 64;                                            \
2379         do {                                                            \
2380           __inv = 2 * __inv - __inv * __inv * __n;                      \
2381           __invbits *= 2;                                               \
2382         } while (__invbits < GMP_NUMB_BITS);                            \
2383       }                                                                 \
2384                                                                         \
2385     ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);                        \
2386     (inv) = __inv & GMP_NUMB_MASK;                                      \
2387   } while (0)
2388
2389 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2390    Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2391    GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2392    we need to start from GMP_NUMB_MAX>>1. */
2393 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2394
2395 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2396    These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2397 #define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
2398 #define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2399
2400
2401 /* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
2402
2403    It's not clear whether this is the best way to do this calculation.
2404    Anything congruent to -a would be fine for the one limb congruence
2405    tests.  */
2406
2407 #define NEG_MOD(r, a, d)                                                \
2408   do {                                                                  \
2409     ASSERT ((d) != 0);                                                  \
2410     ASSERT_LIMB (a);                                                    \
2411     ASSERT_LIMB (d);                                                    \
2412                                                                         \
2413     if ((a) <= (d))                                                     \
2414       {                                                                 \
2415         /* small a is reasonably likely */                              \
2416         (r) = (d) - (a);                                                \
2417       }                                                                 \
2418     else                                                                \
2419       {                                                                 \
2420         unsigned   __twos;                                              \
2421         mp_limb_t  __dnorm;                                             \
2422         count_leading_zeros (__twos, d);                                \
2423         __twos -= GMP_NAIL_BITS;                                        \
2424         __dnorm = (d) << __twos;                                        \
2425         (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);             \
2426       }                                                                 \
2427                                                                         \
2428     ASSERT_LIMB (r);                                                    \
2429   } while (0)
2430
2431 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2432 #define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
2433
2434
2435 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2436    to 0 if there's an even number.  "n" should be an unsigned long and "p"
2437    an int.  */
2438
2439 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2440 #define ULONG_PARITY(p, n)                                              \
2441   do {                                                                  \
2442     int __p;                                                            \
2443     __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));                    \
2444     (p) = __p & 1;                                                      \
2445   } while (0)
2446 #endif
2447
2448 /* Cray intrinsic _popcnt. */
2449 #ifdef _CRAY
2450 #define ULONG_PARITY(p, n)      \
2451   do {                          \
2452     (p) = _popcnt (n) & 1;      \
2453   } while (0)
2454 #endif
2455
2456 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2457     && ! defined (NO_ASM) && defined (__ia64)
2458 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
2459    to a 64 bit unsigned long long for popcnt */
2460 #define ULONG_PARITY(p, n)                                              \
2461   do {                                                                  \
2462     unsigned long long  __n = (unsigned long) (n);                      \
2463     int  __p;                                                           \
2464     __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));                \
2465     (p) = __p & 1;                                                      \
2466   } while (0)
2467 #endif
2468
2469 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
2470 #if __GMP_GNUC_PREREQ (3,1)
2471 #define __GMP_qm "=Qm"
2472 #define __GMP_q "=Q"
2473 #else
2474 #define __GMP_qm "=qm"
2475 #define __GMP_q "=q"
2476 #endif
2477 #define ULONG_PARITY(p, n)                                              \
2478   do {                                                                  \
2479     char           __p;                                                 \
2480     unsigned long  __n = (n);                                           \
2481     __n ^= (__n >> 16);                                                 \
2482     __asm__ ("xorb %h1, %b1\n\t"                                        \
2483              "setpo %0"                                                 \
2484          : __GMP_qm (__p), __GMP_q (__n)                                \
2485          : "1" (__n));                                                  \
2486     (p) = __p;                                                          \
2487   } while (0)
2488 #endif
2489
2490 #if ! defined (ULONG_PARITY)
2491 #define ULONG_PARITY(p, n)                                              \
2492   do {                                                                  \
2493     unsigned long  __n = (n);                                           \
2494     int  __p = 0;                                                       \
2495     do                                                                  \
2496       {                                                                 \
2497         __p ^= 0x96696996L >> (__n & 0x1F);                             \
2498         __n >>= 5;                                                      \
2499       }                                                                 \
2500     while (__n != 0);                                                   \
2501                                                                         \
2502     (p) = __p & 1;                                                      \
2503   } while (0)
2504 #endif
2505
2506
2507 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
2508    version 3.1 at least) doesn't seem to know how to generate rlwimi for
2509    anything other than bit-fields, so use "asm".  */
2510 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
2511   && HAVE_HOST_CPU_FAMILY_powerpc && BITS_PER_MP_LIMB == 32
2512 #define BSWAP_LIMB(dst, src)                                            \
2513   do {                                                                  \
2514     mp_limb_t  __bswapl_src = (src);                                    \
2515     mp_limb_t  __tmp1 = __bswapl_src >> 24;             /* low byte */  \
2516     mp_limb_t  __tmp2 = __bswapl_src << 24;             /* high byte */ \
2517     __asm__ ("rlwimi %0, %2, 24, 16, 23"                /* 2nd low */   \
2518          : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));           \
2519     __asm__ ("rlwimi %0, %2,  8,  8, 15"                /* 3nd high */  \
2520          : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));           \
2521     (dst) = __tmp1 | __tmp2;                            /* whole */     \
2522   } while (0)
2523 #endif
2524
2525 /* bswap is available on i486 and up and is fast.  A combination rorw $8 /
2526    roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
2527    kernel with xchgb instead of rorw), but this is not done here, because
2528    i386 means generic x86 and mixing word and dword operations will cause
2529    partial register stalls on P6 chips.  */
2530 #if defined (__GNUC__) && ! defined (NO_ASM)            \
2531   && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
2532   && BITS_PER_MP_LIMB == 32
2533 #define BSWAP_LIMB(dst, src)                                            \
2534   do {                                                                  \
2535     __asm__ ("bswap %0" : "=r" (dst) : "0" (src));                      \
2536   } while (0)
2537 #endif
2538
2539 #if defined (__GNUC__) && ! defined (NO_ASM)            \
2540   && defined (__amd64__) && BITS_PER_MP_LIMB == 64
2541 #define BSWAP_LIMB(dst, src)                                            \
2542   do {                                                                  \
2543     __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));                     \
2544   } while (0)
2545 #endif
2546
2547 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2548     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
2549 #define BSWAP_LIMB(dst, src)                                            \
2550   do {                                                                  \
2551     __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));           \
2552   } while (0)
2553 #endif
2554
2555 /* As per glibc. */
2556 #if defined (__GNUC__) && ! defined (NO_ASM)                    \
2557   && HAVE_HOST_CPU_FAMILY_m68k && BITS_PER_MP_LIMB == 32
2558 #define BSWAP_LIMB(dst, src)                                            \
2559   do {                                                                  \
2560     mp_limb_t  __bswapl_src = (src);                                    \
2561     __asm__ ("ror%.w %#8, %0\n\t"                                       \
2562              "swap   %0\n\t"                                            \
2563              "ror%.w %#8, %0"                                           \
2564              : "=d" (dst)                                               \
2565              : "0" (__bswapl_src));                                     \
2566   } while (0)
2567 #endif
2568
2569 #if ! defined (BSWAP_LIMB)
2570 #if BITS_PER_MP_LIMB == 8
2571 #define BSWAP_LIMB(dst, src)            \
2572   do { (dst) = (src); } while (0)
2573 #endif
2574 #if BITS_PER_MP_LIMB == 16
2575 #define BSWAP_LIMB(dst, src)                    \
2576   do {                                          \
2577     (dst) = ((src) << 8) + ((src) >> 8);        \
2578   } while (0)
2579 #endif
2580 #if BITS_PER_MP_LIMB == 32
2581 #define BSWAP_LIMB(dst, src)    \
2582   do {                          \
2583     (dst) =                     \
2584       ((src) << 24)             \
2585       + (((src) & 0xFF00) << 8) \
2586       + (((src) >> 8) & 0xFF00) \
2587       + ((src) >> 24);          \
2588   } while (0)
2589 #endif
2590 #if BITS_PER_MP_LIMB == 64
2591 #define BSWAP_LIMB(dst, src)            \
2592   do {                                  \
2593     (dst) =                             \
2594       ((src) << 56)                     \
2595       + (((src) & 0xFF00) << 40)        \
2596       + (((src) & 0xFF0000) << 24)      \
2597       + (((src) & 0xFF000000) << 8)     \
2598       + (((src) >> 8) & 0xFF000000)     \
2599       + (((src) >> 24) & 0xFF0000)      \
2600       + (((src) >> 40) & 0xFF00)        \
2601       + ((src) >> 56);                  \
2602   } while (0)
2603 #endif
2604 #endif
2605
2606 #if ! defined (BSWAP_LIMB)
2607 #define BSWAP_LIMB(dst, src)                            \
2608   do {                                                  \
2609     mp_limb_t  __bswapl_src = (src);                    \
2610     mp_limb_t  __dst = 0;                               \
2611     int        __i;                                     \
2612     for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++)       \
2613       {                                                 \
2614         __dst = (__dst << 8) | (__bswapl_src & 0xFF);   \
2615         __bswapl_src >>= 8;                             \
2616       }                                                 \
2617     (dst) = __dst;                                      \
2618   } while (0)
2619 #endif
2620
2621
2622 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
2623    those we know are fast.  */
2624 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
2625   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
2626   && (HAVE_HOST_CPU_powerpc604                                          \
2627       || HAVE_HOST_CPU_powerpc604e                                      \
2628       || HAVE_HOST_CPU_powerpc750                                       \
2629       || HAVE_HOST_CPU_powerpc7400)
2630 #define BSWAP_LIMB_FETCH(limb, src)                                     \
2631   do {                                                                  \
2632     mp_srcptr  __blf_src = (src);                                       \
2633     mp_limb_t  __limb;                                                  \
2634     __asm__ ("lwbrx %0, 0, %1"                                          \
2635              : "=r" (__limb)                                            \
2636              : "r" (__blf_src),                                         \
2637                "m" (*__blf_src));                                       \
2638     (limb) = __limb;                                                    \
2639   } while (0)
2640 #endif
2641
2642 #if ! defined (BSWAP_LIMB_FETCH)
2643 #define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
2644 #endif
2645
2646
2647 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
2648    know are fast.  FIXME: Is this necessary?  */
2649 #if defined (__GNUC__) && ! defined (NO_ASM)                            \
2650   && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN                     \
2651   && (HAVE_HOST_CPU_powerpc604                                          \
2652       || HAVE_HOST_CPU_powerpc604e                                      \
2653       || HAVE_HOST_CPU_powerpc750                                       \
2654       || HAVE_HOST_CPU_powerpc7400)
2655 #define BSWAP_LIMB_STORE(dst, limb)                                     \
2656   do {                                                                  \
2657     mp_ptr     __dst = (dst);                                           \
2658     mp_limb_t  __limb = (limb);                                         \
2659     __asm__ ("stwbrx %1, 0, %2"                                         \
2660              : "=m" (*__dst)                                            \
2661              : "r" (__limb),                                            \
2662                "r" (__dst));                                            \
2663   } while (0)
2664 #endif
2665
2666 #if ! defined (BSWAP_LIMB_STORE)
2667 #define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
2668 #endif
2669
2670
2671 /* Byte swap limbs from {src,size} and store at {dst,size}. */
2672 #define MPN_BSWAP(dst, src, size)                       \
2673   do {                                                  \
2674     mp_ptr     __dst = (dst);                           \
2675     mp_srcptr  __src = (src);                           \
2676     mp_size_t  __size = (size);                         \
2677     mp_size_t  __i;                                     \
2678     ASSERT ((size) >= 0);                               \
2679     ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));   \
2680     CRAY_Pragma ("_CRI ivdep");                         \
2681     for (__i = 0; __i < __size; __i++)                  \
2682       {                                                 \
2683         BSWAP_LIMB_FETCH (*__dst, __src);               \
2684         __dst++;                                        \
2685         __src++;                                        \
2686       }                                                 \
2687   } while (0)
2688
2689 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
2690 #define MPN_BSWAP_REVERSE(dst, src, size)               \
2691   do {                                                  \
2692     mp_ptr     __dst = (dst);                           \
2693     mp_size_t  __size = (size);                         \
2694     mp_srcptr  __src = (src) + __size - 1;              \
2695     mp_size_t  __i;                                     \
2696     ASSERT ((size) >= 0);                               \
2697     ASSERT (! MPN_OVERLAP_P (dst, size, src, size));    \
2698     CRAY_Pragma ("_CRI ivdep");                         \
2699     for (__i = 0; __i < __size; __i++)                  \
2700       {                                                 \
2701         BSWAP_LIMB_FETCH (*__dst, __src);               \
2702         __dst++;                                        \
2703         __src--;                                        \
2704       }                                                 \
2705   } while (0)
2706
2707
2708 /* No processor claiming to be SPARC v9 compliant seems to
2709    implement the POPC instruction.  Disable pattern for now.  */
2710 #if 0
2711 #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
2712 #define popc_limb(result, input)                                        \
2713   do {                                                                  \
2714     DItype __res;                                                       \
2715     __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));              \
2716   } while (0)
2717 #endif
2718 #endif
2719
2720 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2721 #define popc_limb(result, input)                                        \
2722   do {                                                                  \
2723     __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));             \
2724   } while (0)
2725 #endif
2726
2727 /* Cray intrinsic. */
2728 #ifdef _CRAY
2729 #define popc_limb(result, input)        \
2730   do {                                  \
2731     (result) = _popcnt (input);         \
2732   } while (0)
2733 #endif
2734
2735 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)                  \
2736     && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
2737 #define popc_limb(result, input)                                        \
2738   do {                                                                  \
2739     __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));           \
2740   } while (0)
2741 #endif
2742
2743 /* Cool population count of an mp_limb_t.
2744    You have to figure out how this works, We won't tell you!
2745
2746    The constants could also be expressed as:
2747      0x55... = [2^N / 3]     = [(2^N-1)/3]
2748      0x33... = [2^N / 5]     = [(2^N-1)/5]
2749      0x0f... = [2^N / 17]    = [(2^N-1)/17]
2750      (N is GMP_LIMB_BITS, [] denotes truncation.) */
2751
2752 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
2753 #define popc_limb(result, input)                                        \
2754   do {                                                                  \
2755     mp_limb_t  __x = (input);                                           \
2756     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
2757     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
2758     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
2759     (result) = __x & 0xff;                                              \
2760   } while (0)
2761 #endif
2762
2763 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
2764 #define popc_limb(result, input)                                        \
2765   do {                                                                  \
2766     mp_limb_t  __x = (input);                                           \
2767     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
2768     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
2769     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
2770     if (GMP_LIMB_BITS > 8)                                              \
2771       __x = ((__x >> 8) + __x);                                         \
2772     (result) = __x & 0xff;                                              \
2773   } while (0)
2774 #endif
2775
2776 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
2777 #define popc_limb(result, input)                                        \
2778   do {                                                                  \
2779     mp_limb_t  __x = (input);                                           \
2780     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
2781     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
2782     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
2783     if (GMP_LIMB_BITS > 8)                                              \
2784       __x = ((__x >> 8) + __x);                                         \
2785     if (GMP_LIMB_BITS > 16)                                             \
2786       __x = ((__x >> 16) + __x);                                        \
2787     (result) = __x & 0xff;                                              \
2788   } while (0)
2789 #endif
2790
2791 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
2792 #define popc_limb(result, input)                                        \
2793   do {                                                                  \
2794     mp_limb_t  __x = (input);                                           \
2795     __x -= (__x >> 1) & MP_LIMB_T_MAX/3;                                \
2796     __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);     \
2797     __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;                        \
2798     if (GMP_LIMB_BITS > 8)                                              \
2799       __x = ((__x >> 8) + __x);                                         \
2800     if (GMP_LIMB_BITS > 16)                                             \
2801       __x = ((__x >> 16) + __x);                                        \
2802     if (GMP_LIMB_BITS > 32)                                             \
2803       __x = ((__x >> 32) + __x);                                        \
2804     (result) = __x & 0xff;                                              \
2805   } while (0)
2806 #endif
2807
2808
2809 /* Define stuff for longlong.h.  */
2810 #if HAVE_ATTRIBUTE_MODE
2811 typedef unsigned int UQItype    __attribute__ ((mode (QI)));
2812 typedef          int SItype     __attribute__ ((mode (SI)));
2813 typedef unsigned int USItype    __attribute__ ((mode (SI)));
2814 typedef          int DItype     __attribute__ ((mode (DI)));
2815 typedef unsigned int UDItype    __attribute__ ((mode (DI)));
2816 #else
2817 typedef unsigned char UQItype;
2818 typedef          long SItype;
2819 typedef unsigned long USItype;
2820 #if HAVE_LONG_LONG
2821 typedef long long int DItype;
2822 typedef unsigned long long int UDItype;
2823 #else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
2824 typedef long int DItype;
2825 typedef unsigned long int UDItype;
2826 #endif
2827 #endif
2828
2829 typedef mp_limb_t UWtype;
2830 typedef unsigned int UHWtype;
2831 #define W_TYPE_SIZE BITS_PER_MP_LIMB
2832
2833 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
2834
2835    Bit field packing is "implementation defined" according to C99, which
2836    leaves us at the compiler's mercy here.  For some systems packing is
2837    defined in the ABI (eg. x86).  In any case so far it seems universal that
2838    little endian systems pack from low to high, and big endian from high to
2839    low within the given type.
2840
2841    Within the fields we rely on the integer endianness being the same as the
2842    float endianness, this is true everywhere we know of and it'd be a fairly
2843    strange system that did anything else.  */
2844
2845 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
2846 #define _GMP_IEEE_FLOATS 1
2847 union ieee_double_extract
2848 {
2849   struct
2850     {
2851       gmp_uint_least32_t manh:20;
2852       gmp_uint_least32_t exp:11;
2853       gmp_uint_least32_t sig:1;
2854       gmp_uint_least32_t manl:32;
2855     } s;
2856   double d;
2857 };
2858 #endif
2859
2860 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
2861 #define _GMP_IEEE_FLOATS 1
2862 union ieee_double_extract
2863 {
2864   struct
2865     {
2866       gmp_uint_least32_t manl:32;
2867       gmp_uint_least32_t manh:20;
2868       gmp_uint_least32_t exp:11;
2869       gmp_uint_least32_t sig:1;
2870     } s;
2871   double d;
2872 };
2873 #endif
2874
2875 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
2876 #define _GMP_IEEE_FLOATS 1
2877 union ieee_double_extract
2878 {
2879   struct
2880     {
2881       gmp_uint_least32_t sig:1;
2882       gmp_uint_least32_t exp:11;
2883       gmp_uint_least32_t manh:20;
2884       gmp_uint_least32_t manl:32;
2885     } s;
2886   double d;
2887 };
2888 #endif
2889
2890
2891 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
2892    that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
2893 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
2894 /* Maximum number of limbs it will take to store any `double'.
2895    We assume doubles have 53 mantissam bits.  */
2896 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS + 1)
2897
2898 int __gmp_extract_double _PROTO ((mp_ptr, double));
2899
2900 #define mpn_get_d __gmpn_get_d
2901 double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
2902
2903
2904 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
2905    a_inf if x is an infinity.  Both are considered unlikely values, for
2906    branch prediction.  */
2907
2908 #if _GMP_IEEE_FLOATS
2909 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
2910   do {                                          \
2911     union ieee_double_extract  u;               \
2912     u.d = (x);                                  \
2913     if (UNLIKELY (u.s.exp == 0x7FF))            \
2914       {                                         \
2915         if (u.s.manl == 0 && u.s.manh == 0)     \
2916           { a_inf; }                            \
2917         else                                    \
2918           { a_nan; }                            \
2919       }                                         \
2920   } while (0)
2921 #endif
2922
2923 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
2924 /* no nans or infs in these formats */
2925 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
2926   do { } while (0)
2927 #endif
2928
2929 #ifndef DOUBLE_NAN_INF_ACTION
2930 /* Unknown format, try something generic.
2931    NaN should be "unordered", so x!=x.
2932    Inf should be bigger than DBL_MAX.  */
2933 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)                  \
2934   do {                                                          \
2935     {                                                           \
2936       if (UNLIKELY ((x) != (x)))                                \
2937         { a_nan; }                                              \
2938       else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))      \
2939         { a_inf; }                                              \
2940     }                                                           \
2941   } while (0)
2942 #endif
2943
2944 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
2945    in the coprocessor, which means a bigger exponent range than normal, and
2946    depending on the rounding mode, a bigger mantissa than normal.  (See
2947    "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
2948    "d" through memory to force any rounding and overflows to occur.
2949
2950    On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
2951    registers, where there's no such extra precision and no need for the
2952    FORCE_DOUBLE.  We don't bother to detect this since the present uses for
2953    FORCE_DOUBLE are only in test programs and default generic C code.
2954
2955    Not quite sure that an "automatic volatile" will use memory, but it does
2956    in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
2957    apparently matching operands like "0" are only allowed on a register
2958    output.  gcc 3.4 warns about this, though in fact it and past versions
2959    seem to put the operand through memory as hoped.  */
2960
2961 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
2962      || defined (__amd64__))
2963 #define FORCE_DOUBLE(d) \
2964   do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
2965 #else
2966 #define FORCE_DOUBLE(d)  do { } while (0)
2967 #endif
2968
2969
2970 extern int __gmp_junk;
2971 extern const int __gmp_0;
2972 void __gmp_exception _PROTO ((int)) ATTRIBUTE_NORETURN;
2973 void __gmp_divide_by_zero _PROTO ((void)) ATTRIBUTE_NORETURN;
2974 void __gmp_sqrt_of_negative _PROTO ((void)) ATTRIBUTE_NORETURN;
2975 void __gmp_invalid_operation _PROTO ((void)) ATTRIBUTE_NORETURN;
2976 #define GMP_ERROR(code)   __gmp_exception (code)
2977 #define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
2978 #define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
2979
2980 #if defined _LONG_LONG_LIMB
2981 #if __GMP_HAVE_TOKEN_PASTE
2982 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
2983 #else
2984 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
2985 #endif
2986 #else /* not _LONG_LONG_LIMB */
2987 #if __GMP_HAVE_TOKEN_PASTE
2988 #define CNST_LIMB(C) ((mp_limb_t) C##L)
2989 #else
2990 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
2991 #endif
2992 #endif /* _LONG_LONG_LIMB */
2993
2994 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
2995 #if GMP_NUMB_BITS == 2
2996 #define PP 0x3                                  /* 3 */
2997 #define PP_FIRST_OMITTED 5
2998 #endif
2999 #if GMP_NUMB_BITS == 4
3000 #define PP 0xF                                  /* 3 x 5 */
3001 #define PP_FIRST_OMITTED 7
3002 #endif
3003 #if GMP_NUMB_BITS == 8
3004 #define PP 0x69                                 /* 3 x 5 x 7 */
3005 #define PP_FIRST_OMITTED 11
3006 #endif
3007 #if GMP_NUMB_BITS == 16
3008 #define PP 0x3AA7                               /* 3 x 5 x 7 x 11 x 13 */
3009 #define PP_FIRST_OMITTED 17
3010 #endif
3011 #if GMP_NUMB_BITS == 32
3012 #define PP 0xC0CFD797L                          /* 3 x 5 x 7 x 11 x ... x 29 */
3013 #define PP_INVERTED 0x53E5645CL
3014 #define PP_FIRST_OMITTED 31
3015 #endif
3016 #if GMP_NUMB_BITS == 64
3017 #define PP CNST_LIMB(0xE221F97C30E94E1D)        /* 3 x 5 x 7 x 11 x ... x 53 */
3018 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3019 #define PP_FIRST_OMITTED 59
3020 #endif
3021 #ifndef PP_FIRST_OMITTED
3022 #define PP_FIRST_OMITTED 3
3023 #endif
3024
3025
3026
3027 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3028    zero bit representing +1 and a one bit representing -1.  Bits other than
3029    bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
3030    used to ensure the expressions are "int"s even if a and/or b might be
3031    other types.
3032
3033    JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3034    and their speed is important.  Expressions are used rather than
3035    conditionals to accumulate sign changes, which effectively means XORs
3036    instead of conditional JUMPs. */
3037
3038 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3039 #define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
3040
3041 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3042 #define JACOBI_U0(a)   ((a) == 1)
3043
3044 /* (a/0), with a given by low and size;
3045    is 1 if a=+/-1, 0 otherwise */
3046 #define JACOBI_LS0(alow,asize) \
3047   (((asize) == 1 || (asize) == -1) && (alow) == 1)
3048
3049 /* (a/0), with a an mpz_t;
3050    fetch of low limb always valid, even if size is zero */
3051 #define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
3052
3053 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3054 #define JACOBI_0U(b)   ((b) == 1)
3055
3056 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3057 #define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
3058
3059 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3060 #define JACOBI_0LS(blow,bsize) \
3061   (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3062
3063 /* Convert a bit1 to +1 or -1. */
3064 #define JACOBI_BIT1_TO_PN(result_bit1) \
3065   (1 - ((int) (result_bit1) & 2))
3066
3067 /* (2/b), with b unsigned and odd;
3068    is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3069    hence obtained from (b>>1)^b */
3070 #define JACOBI_TWO_U_BIT1(b) \
3071   ((int) (((b) >> 1) ^ (b)))
3072
3073 /* (2/b)^twos, with b unsigned and odd */
3074 #define JACOBI_TWOS_U_BIT1(twos, b) \
3075   ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3076
3077 /* (2/b)^twos, with b unsigned and odd */
3078 #define JACOBI_TWOS_U(twos, b) \
3079   (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3080
3081 /* (-1/b), with b odd (signed or unsigned);
3082    is (-1)^((b-1)/2) */
3083 #define JACOBI_N1B_BIT1(b) \
3084   ((int) (b))
3085
3086 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3087    is (-1/b) if a<0, or +1 if a>=0 */
3088 #define JACOBI_ASGN_SU_BIT1(a, b) \
3089   ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3090
3091 /* (a/b) effect due to sign of b: signed/signed;
3092    is -1 if a and b both negative, +1 otherwise */
3093 #define JACOBI_BSGN_SS_BIT1(a, b) \
3094   ((((a)<0) & ((b)<0)) << 1)
3095
3096 /* (a/b) effect due to sign of b: signed/mpz;
3097    is -1 if a and b both negative, +1 otherwise */
3098 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3099   JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3100
3101 /* (a/b) effect due to sign of b: mpz/signed;
3102    is -1 if a and b both negative, +1 otherwise */
3103 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3104   JACOBI_BSGN_SZ_BIT1 (b, a)
3105
3106 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3107    is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3108    both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
3109    because this is used in a couple of places with only bit 1 of a or b
3110    valid. */
3111 #define JACOBI_RECIP_UU_BIT1(a, b) \
3112   ((int) ((a) & (b)))
3113
3114 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3115    decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
3116    updated for the new b_ptr.  result_bit1 is updated according to the
3117    factors of 2 stripped, as per (a/2).  */
3118 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)    \
3119   do {                                                                  \
3120     ASSERT ((b_size) >= 1);                                             \
3121     ASSERT ((b_low) == (b_ptr)[0]);                                     \
3122                                                                         \
3123     while (UNLIKELY ((b_low) == 0))                                     \
3124       {                                                                 \
3125         (b_size)--;                                                     \
3126         ASSERT ((b_size) >= 1);                                         \
3127         (b_ptr)++;                                                      \
3128         (b_low) = *(b_ptr);                                             \
3129                                                                         \
3130         ASSERT (((a) & 1) != 0);                                        \
3131         if ((GMP_NUMB_BITS % 2) == 1)                                   \
3132           (result_bit1) ^= JACOBI_TWO_U_BIT1(a);                        \
3133       }                                                                 \
3134   } while (0)
3135
3136 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3137    modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
3138    unsigned.  modexact_1_odd effectively calculates -a mod b, and
3139    result_bit1 is adjusted for the factor of -1.
3140
3141    The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3142    sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3143    factor to introduce into result_bit1, so for that case use mpn_mod_1
3144    unconditionally.
3145
3146    FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3147    for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
3148    or not skip a divide step, or something. */
3149
3150 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3151   do {                                                                     \
3152     mp_srcptr  __a_ptr  = (a_ptr);                                         \
3153     mp_size_t  __a_size = (a_size);                                        \
3154     mp_limb_t  __b      = (b);                                             \
3155                                                                            \
3156     ASSERT (__a_size >= 1);                                                \
3157     ASSERT (__b & 1);                                                      \
3158                                                                            \
3159     if ((GMP_NUMB_BITS % 2) != 0                                           \
3160         || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD))           \
3161       {                                                                    \
3162         (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);                      \
3163       }                                                                    \
3164     else                                                                   \
3165       {                                                                    \
3166         (result_bit1) ^= JACOBI_N1B_BIT1 (__b);                            \
3167         (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);             \
3168       }                                                                    \
3169   } while (0)
3170
3171
3172 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3173    limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
3174    hence giving back the user's size in bits rounded up.  Notice that
3175    converting prec->bits->prec gives an unchanged value.  */
3176 #define __GMPF_BITS_TO_PREC(n)                                          \
3177   ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3178 #define __GMPF_PREC_TO_BITS(n) \
3179   ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3180
3181 extern mp_size_t __gmp_default_fp_limb_precision;
3182
3183
3184 /* Set n to the number of significant digits an mpf of the given _mp_prec
3185    field, in the given base.  This is a rounded up value, designed to ensure
3186    there's enough digits to reproduce all the guaranteed part of the value.
3187
3188    There are prec many limbs, but the high might be only "1" so forget it
3189    and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
3190    further +1 is because the limbs usually won't fall on digit boundaries.
3191
3192    FIXME: If base is a power of 2 and the bits per digit divides
3193    BITS_PER_MP_LIMB then the +2 is unnecessary.  This happens always for
3194    base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3195
3196 #define MPF_SIGNIFICANT_DIGITS(n, base, prec)                           \
3197   do {                                                                  \
3198     ASSERT (base >= 2 && base < numberof (mp_bases));                   \
3199     (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS)         \
3200                         * mp_bases[(base)].chars_per_bit_exactly);      \
3201   } while (0)
3202
3203
3204 /* Decimal point string, from the current C locale.  Needs <langinfo.h> for
3205    nl_langinfo and constants, preferrably with _GNU_SOURCE defined to get
3206    DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3207    their respective #if HAVE_FOO_H.
3208
3209    GLIBC recommends nl_langinfo because getting only one facet can be
3210    faster, apparently. */
3211
3212 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3213 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3214 #define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
3215 #endif
3216 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3217 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3218 #define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
3219 #endif
3220 /* localeconv is slower since it returns all locale stuff */
3221 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3222 #define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
3223 #endif
3224 #if ! defined (GMP_DECIMAL_POINT)
3225 #define GMP_DECIMAL_POINT  (".")
3226 #endif
3227
3228
3229 #define DOPRNT_CONV_FIXED        1
3230 #define DOPRNT_CONV_SCIENTIFIC   2
3231 #define DOPRNT_CONV_GENERAL      3
3232
3233 #define DOPRNT_JUSTIFY_NONE      0
3234 #define DOPRNT_JUSTIFY_LEFT      1
3235 #define DOPRNT_JUSTIFY_RIGHT     2
3236 #define DOPRNT_JUSTIFY_INTERNAL  3
3237
3238 #define DOPRNT_SHOWBASE_YES      1
3239 #define DOPRNT_SHOWBASE_NO       2
3240 #define DOPRNT_SHOWBASE_NONZERO  3
3241
3242 struct doprnt_params_t {
3243   int         base;          /* negative for upper case */
3244   int         conv;          /* choices above */
3245   const char  *expfmt;       /* exponent format */
3246   int         exptimes4;     /* exponent multiply by 4 */
3247   char        fill;          /* character */
3248   int         justify;       /* choices above */
3249   int         prec;          /* prec field, or -1 for all digits */
3250   int         showbase;      /* choices above */
3251   int         showpoint;     /* if radix point always shown */
3252   int         showtrailing;  /* if trailing zeros wanted */
3253   char        sign;          /* '+', ' ', or '\0' */
3254   int         width;         /* width field */
3255 };
3256
3257 #if _GMP_H_HAVE_VA_LIST
3258
3259 typedef int (*doprnt_format_t) _PROTO ((void *data, const char *fmt, va_list ap));
3260 typedef int (*doprnt_memory_t) _PROTO ((void *data, const char *str, size_t len));
3261 typedef int (*doprnt_reps_t) _PROTO ((void *data, int c, int reps));
3262 typedef int (*doprnt_final_t) _PROTO ((void *data));
3263
3264 struct doprnt_funs_t {
3265   doprnt_format_t  format;
3266   doprnt_memory_t  memory;
3267   doprnt_reps_t    reps;
3268   doprnt_final_t   final;   /* NULL if not required */
3269 };
3270
3271 extern const struct doprnt_funs_t  __gmp_fprintf_funs;
3272 extern const struct doprnt_funs_t  __gmp_sprintf_funs;
3273 extern const struct doprnt_funs_t  __gmp_snprintf_funs;
3274 extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
3275 extern const struct doprnt_funs_t  __gmp_ostream_funs;
3276
3277 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
3278    "size" of these have been written.  "alloc > size" is maintained, so
3279    there's room to store a '\0' at the end.  "result" is where the
3280    application wants the final block pointer.  */
3281 struct gmp_asprintf_t {
3282   char    **result;
3283   char    *buf;
3284   size_t  size;
3285   size_t  alloc;
3286 };
3287
3288 #define GMP_ASPRINTF_T_INIT(d, output)                          \
3289   do {                                                          \
3290     (d).result = (output);                                      \
3291     (d).alloc = 256;                                            \
3292     (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);      \
3293     (d).size = 0;                                               \
3294   } while (0)
3295
3296 /* If a realloc is necessary, use twice the size actually required, so as to
3297    avoid repeated small reallocs.  */
3298 #define GMP_ASPRINTF_T_NEED(d, n)                                       \
3299   do {                                                                  \
3300     size_t  alloc, newsize, newalloc;                                   \
3301     ASSERT ((d)->alloc >= (d)->size + 1);                               \
3302                                                                         \
3303     alloc = (d)->alloc;                                                 \
3304     newsize = (d)->size + (n);                                          \
3305     if (alloc <= newsize)                                               \
3306       {                                                                 \
3307         newalloc = 2*newsize;                                           \
3308         (d)->alloc = newalloc;                                          \
3309         (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,                \
3310                                                alloc, newalloc, char);  \
3311       }                                                                 \
3312   } while (0)
3313
3314 __GMP_DECLSPEC int __gmp_asprintf_memory _PROTO ((struct gmp_asprintf_t *d, const char *str, size_t len));
3315 __GMP_DECLSPEC int __gmp_asprintf_reps _PROTO ((struct gmp_asprintf_t *d, int c, int reps));
3316 __GMP_DECLSPEC int __gmp_asprintf_final _PROTO ((struct gmp_asprintf_t *d));
3317
3318 /* buf is where to write the next output, and size is how much space is left
3319    there.  If the application passed size==0 then that's what we'll have
3320    here, and nothing at all should be written.  */
3321 struct gmp_snprintf_t {
3322   char    *buf;
3323   size_t  size;
3324 };
3325
3326 /* Add the bytes printed by the call to the total retval, or bail out on an
3327    error.  */
3328 #define DOPRNT_ACCUMULATE(call) \
3329   do {                          \
3330     int  __ret;                 \
3331     __ret = call;               \
3332     if (__ret == -1)            \
3333       goto error;               \
3334     retval += __ret;            \
3335   } while (0)
3336 #define DOPRNT_ACCUMULATE_FUN(fun, params)      \
3337   do {                                          \
3338     ASSERT ((fun) != NULL);                     \
3339     DOPRNT_ACCUMULATE ((*(fun)) params);        \
3340   } while (0)
3341
3342 #define DOPRNT_FORMAT(fmt, ap)                          \
3343   DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
3344 #define DOPRNT_MEMORY(ptr, len)                                 \
3345   DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
3346 #define DOPRNT_REPS(c, n)                               \
3347   DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
3348
3349 #define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
3350
3351 #define DOPRNT_REPS_MAYBE(c, n) \
3352   do {                          \
3353     if ((n) != 0)               \
3354       DOPRNT_REPS (c, n);       \
3355   } while (0)
3356 #define DOPRNT_MEMORY_MAYBE(ptr, len)   \
3357   do {                                  \
3358     if ((len) != 0)                     \
3359       DOPRNT_MEMORY (ptr, len);         \
3360   } while (0)
3361
3362 __GMP_DECLSPEC int __gmp_doprnt _PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
3363 __GMP_DECLSPEC int __gmp_doprnt_integer _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
3364
3365 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
3366 __GMP_DECLSPEC int __gmp_doprnt_mpf _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
3367
3368 int __gmp_replacement_vsnprintf _PROTO ((char *, size_t, const char *, va_list));
3369 #endif /* _GMP_H_HAVE_VA_LIST */
3370
3371
3372 typedef int (*gmp_doscan_scan_t)  _PROTO ((void *, const char *, ...));
3373 typedef void *(*gmp_doscan_step_t) _PROTO ((void *, int));
3374 typedef int (*gmp_doscan_get_t)   _PROTO ((void *));
3375 typedef int (*gmp_doscan_unget_t) _PROTO ((int, void *));
3376
3377 struct gmp_doscan_funs_t {
3378   gmp_doscan_scan_t   scan;
3379   gmp_doscan_step_t   step;
3380   gmp_doscan_get_t    get;
3381   gmp_doscan_unget_t  unget;
3382 };
3383 extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
3384 extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
3385
3386 #if _GMP_H_HAVE_VA_LIST
3387 int __gmp_doscan _PROTO ((const struct gmp_doscan_funs_t *, void *,
3388                           const char *, va_list));
3389 #endif
3390
3391
3392 /* For testing and debugging.  */
3393 #define MPZ_CHECK_FORMAT(z)                                     \
3394   do {                                                          \
3395     ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);   \
3396     ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));                       \
3397     ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));                       \
3398   } while (0)
3399
3400 #define MPQ_CHECK_FORMAT(q)                             \
3401   do {                                                  \
3402     MPZ_CHECK_FORMAT (mpq_numref (q));                  \
3403     MPZ_CHECK_FORMAT (mpq_denref (q));                  \
3404     ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);            \
3405                                                         \
3406     if (SIZ(mpq_numref(q)) == 0)                        \
3407       {                                                 \
3408         /* should have zero as 0/1 */                   \
3409         ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1          \
3410                        && PTR(mpq_denref(q))[0] == 1);  \
3411       }                                                 \
3412     else                                                \
3413       {                                                 \
3414         /* should have no common factors */             \
3415         mpz_t  g;                                       \
3416         mpz_init (g);                                   \
3417         mpz_gcd (g, mpq_numref(q), mpq_denref(q));      \
3418         ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);         \
3419         mpz_clear (g);                                  \
3420       }                                                 \
3421   } while (0)
3422
3423 #define MPF_CHECK_FORMAT(f)                             \
3424   do {                                                  \
3425     ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
3426     ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);              \
3427     if (SIZ(f) == 0)                                    \
3428       ASSERT_ALWAYS (EXP(f) == 0);                      \
3429     if (SIZ(f) != 0)                                    \
3430       ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);        \
3431   } while (0)
3432
3433
3434 #define MPZ_PROVOKE_REALLOC(z)                                  \
3435   do { ALLOC(z) = ABSIZ(z); } while (0)
3436
3437
3438 /* Enhancement: The "mod" and "gcd_1" functions below could have
3439    __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
3440    function pointers, only actual functions.  It probably doesn't make much
3441    difference to the gmp code, since hopefully we arrange calls so there's
3442    no great need for the compiler to move things around.  */
3443
3444 #if WANT_FAT_BINARY && HAVE_HOST_CPU_FAMILY_x86
3445 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
3446    in mpn/x86/x86-defs.m4.  Be sure to update that when changing here.  */
3447 struct cpuvec_t {
3448   DECL_add_n           ((*add_n));
3449   DECL_addmul_1        ((*addmul_1));
3450   DECL_copyd           ((*copyd));
3451   DECL_copyi           ((*copyi));
3452   DECL_divexact_1      ((*divexact_1));
3453   DECL_divexact_by3c   ((*divexact_by3c));
3454   DECL_divrem_1        ((*divrem_1));
3455   DECL_gcd_1           ((*gcd_1));
3456   DECL_lshift          ((*lshift));
3457   DECL_mod_1           ((*mod_1));
3458   DECL_mod_34lsub1     ((*mod_34lsub1));
3459   DECL_modexact_1c_odd ((*modexact_1c_odd));
3460   DECL_mul_1           ((*mul_1));
3461   DECL_mul_basecase    ((*mul_basecase));
3462   DECL_preinv_divrem_1 ((*preinv_divrem_1));
3463   DECL_preinv_mod_1    ((*preinv_mod_1));
3464   DECL_rshift          ((*rshift));
3465   DECL_sqr_basecase    ((*sqr_basecase));
3466   DECL_sub_n           ((*sub_n));
3467   DECL_submul_1        ((*submul_1));
3468   int                  initialized;
3469   mp_size_t            mul_karatsuba_threshold;
3470   mp_size_t            mul_toom3_threshold;
3471   mp_size_t            sqr_karatsuba_threshold;
3472   mp_size_t            sqr_toom3_threshold;
3473 };
3474 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
3475 #endif /* x86 fat binary */
3476
3477 void __gmpn_cpuvec_init __GMP_PROTO ((void));
3478
3479 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
3480    if that hasn't yet been done (to establish the right values).  */
3481 #define CPUVEC_THRESHOLD(field)                                               \
3482   ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
3483    __gmpn_cpuvec.field)
3484
3485
3486
3487 #if TUNE_PROGRAM_BUILD
3488 /* Some extras wanted when recompiling some .c files for use by the tune
3489    program.  Not part of a normal build.
3490
3491    It's necessary to keep these thresholds as #defines (just to an
3492    identically named variable), since various defaults are established based
3493    on #ifdef in the .c files.  For some this is not so (the defaults are
3494    instead establshed above), but all are done this way for consistency. */
3495
3496 #undef  MUL_KARATSUBA_THRESHOLD
3497 #define MUL_KARATSUBA_THRESHOLD      mul_karatsuba_threshold
3498 extern mp_size_t                     mul_karatsuba_threshold;
3499
3500 #undef  MUL_TOOM3_THRESHOLD
3501 #define MUL_TOOM3_THRESHOLD          mul_toom3_threshold
3502 extern mp_size_t                     mul_toom3_threshold;
3503
3504 #undef  MUL_FFT_THRESHOLD
3505 #define MUL_FFT_THRESHOLD            mul_fft_threshold
3506 extern mp_size_t                     mul_fft_threshold;
3507
3508 #undef  MUL_FFT_MODF_THRESHOLD
3509 #define MUL_FFT_MODF_THRESHOLD       mul_fft_modf_threshold
3510 extern mp_size_t                     mul_fft_modf_threshold;
3511
3512 #undef  MUL_FFT_TABLE
3513 #define MUL_FFT_TABLE                { 0 }
3514
3515 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
3516    remain as zero (always use it). */
3517 #if ! HAVE_NATIVE_mpn_sqr_basecase
3518 #undef  SQR_BASECASE_THRESHOLD
3519 #define SQR_BASECASE_THRESHOLD       sqr_basecase_threshold
3520 extern mp_size_t                     sqr_basecase_threshold;
3521 #endif
3522
3523 #if TUNE_PROGRAM_BUILD_SQR
3524 #undef  SQR_KARATSUBA_THRESHOLD
3525 #define SQR_KARATSUBA_THRESHOLD      SQR_KARATSUBA_MAX_GENERIC
3526 #else
3527 #undef  SQR_KARATSUBA_THRESHOLD
3528 #define SQR_KARATSUBA_THRESHOLD      sqr_karatsuba_threshold
3529 extern mp_size_t                     sqr_karatsuba_threshold;
3530 #endif
3531
3532 #undef  SQR_TOOM3_THRESHOLD
3533 #define SQR_TOOM3_THRESHOLD          sqr_toom3_threshold
3534 extern mp_size_t                     sqr_toom3_threshold;
3535
3536 #undef SQR_FFT_THRESHOLD
3537 #define SQR_FFT_THRESHOLD            sqr_fft_threshold
3538 extern mp_size_t                     sqr_fft_threshold;
3539
3540 #undef SQR_FFT_MODF_THRESHOLD
3541 #define SQR_FFT_MODF_THRESHOLD       sqr_fft_modf_threshold
3542 extern mp_size_t                     sqr_fft_modf_threshold;
3543
3544 #undef  SQR_FFT_TABLE
3545 #define SQR_FFT_TABLE                { 0 }
3546
3547 #undef  MULLOW_BASECASE_THRESHOLD
3548 #define MULLOW_BASECASE_THRESHOLD    mullow_basecase_threshold
3549 extern mp_size_t                     mullow_basecase_threshold;
3550
3551 #undef  MULLOW_DC_THRESHOLD
3552 #define MULLOW_DC_THRESHOLD          mullow_dc_threshold
3553 extern mp_size_t                     mullow_dc_threshold;
3554
3555 #undef  MULLOW_MUL_N_THRESHOLD
3556 #define MULLOW_MUL_N_THRESHOLD       mullow_mul_n_threshold
3557 extern mp_size_t                     mullow_mul_n_threshold;
3558
3559
3560 #if ! UDIV_PREINV_ALWAYS
3561 #undef  DIV_SB_PREINV_THRESHOLD
3562 #define DIV_SB_PREINV_THRESHOLD      div_sb_preinv_threshold
3563 extern mp_size_t                     div_sb_preinv_threshold;
3564 #endif
3565
3566 #undef  DIV_DC_THRESHOLD
3567 #define DIV_DC_THRESHOLD             div_dc_threshold
3568 extern mp_size_t                     div_dc_threshold;
3569
3570 #undef  POWM_THRESHOLD
3571 #define POWM_THRESHOLD               powm_threshold
3572 extern mp_size_t                     powm_threshold;
3573
3574 #undef  GCD_ACCEL_THRESHOLD
3575 #define GCD_ACCEL_THRESHOLD          gcd_accel_threshold
3576 extern mp_size_t                     gcd_accel_threshold;
3577
3578 #undef  GCDEXT_THRESHOLD
3579 #define GCDEXT_THRESHOLD             gcdext_threshold
3580 extern mp_size_t                     gcdext_threshold;
3581
3582 #undef DIVREM_1_NORM_THRESHOLD
3583 #define DIVREM_1_NORM_THRESHOLD      divrem_1_norm_threshold
3584 extern mp_size_t                     divrem_1_norm_threshold;
3585
3586 #undef DIVREM_1_UNNORM_THRESHOLD
3587 #define DIVREM_1_UNNORM_THRESHOLD    divrem_1_unnorm_threshold
3588 extern mp_size_t                     divrem_1_unnorm_threshold;
3589
3590 #undef MOD_1_NORM_THRESHOLD
3591 #define MOD_1_NORM_THRESHOLD         mod_1_norm_threshold
3592 extern mp_size_t                     mod_1_norm_threshold;
3593
3594 #undef MOD_1_UNNORM_THRESHOLD
3595 #define MOD_1_UNNORM_THRESHOLD       mod_1_unnorm_threshold
3596 extern mp_size_t                     mod_1_unnorm_threshold;
3597
3598 #if ! UDIV_PREINV_ALWAYS
3599 #undef  DIVREM_2_THRESHOLD
3600 #define DIVREM_2_THRESHOLD           divrem_2_threshold
3601 extern mp_size_t                     divrem_2_threshold;
3602 #endif
3603
3604 #undef  GET_STR_DC_THRESHOLD
3605 #define GET_STR_DC_THRESHOLD         get_str_dc_threshold
3606 extern mp_size_t                     get_str_dc_threshold;
3607
3608 #undef GET_STR_PRECOMPUTE_THRESHOLD
3609 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
3610 extern mp_size_t                     get_str_precompute_threshold;
3611
3612 #undef  SET_STR_THRESHOLD
3613 #define SET_STR_THRESHOLD            set_str_threshold
3614 extern mp_size_t                     SET_STR_THRESHOLD;
3615
3616 #undef  FFT_TABLE_ATTRS
3617 #define FFT_TABLE_ATTRS
3618 extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
3619
3620 /* Sizes the tune program tests up to, used in a couple of recompilations. */
3621 #undef MUL_KARATSUBA_THRESHOLD_LIMIT
3622 #undef MUL_TOOM3_THRESHOLD_LIMIT
3623 #undef MULLOW_BASECASE_THRESHOLD_LIMIT
3624 #undef SQR_TOOM3_THRESHOLD_LIMIT
3625 #define SQR_KARATSUBA_MAX_GENERIC       200
3626 #define MUL_KARATSUBA_THRESHOLD_LIMIT   700
3627 #define MUL_TOOM3_THRESHOLD_LIMIT       700
3628 #define MULLOW_BASECASE_THRESHOLD_LIMIT 200
3629 #define SQR_TOOM3_THRESHOLD_LIMIT       400
3630 #define GET_STR_THRESHOLD_LIMIT         150
3631
3632 /* "thresh" will normally be a variable when tuning, so use the cached
3633    result.  This helps mpn_sb_divrem_mn for instance.  */
3634 #undef  CACHED_ABOVE_THRESHOLD
3635 #define CACHED_ABOVE_THRESHOLD(cache, thresh)  (cache)
3636 #undef  CACHED_BELOW_THRESHOLD
3637 #define CACHED_BELOW_THRESHOLD(cache, thresh)  (cache)
3638
3639 #endif /* TUNE_PROGRAM_BUILD */
3640
3641 #if defined (__cplusplus)
3642 }
3643 #endif
3644
3645
3646 #ifdef __cplusplus
3647
3648 /* A little helper for a null-terminated __gmp_allocate_func string.
3649    The destructor ensures it's freed even if an exception is thrown.
3650    The len field is needed by the destructor, and can be used by anyone else
3651    to avoid a second strlen pass over the data.
3652
3653    Since our input is a C string, using strlen is correct.  Perhaps it'd be
3654    more C++-ish style to use std::char_traits<char>::length, but char_traits
3655    isn't available in gcc 2.95.4.  */
3656
3657 class gmp_allocated_string {
3658  public:
3659   char *str;
3660   size_t len;
3661   gmp_allocated_string(char *arg)
3662   {
3663     str = arg;
3664     len = std::strlen (str);
3665   }
3666   ~gmp_allocated_string()
3667   {
3668     (*__gmp_free_func) (str, len+1);
3669   }
3670 };
3671
3672 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
3673 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
3674 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
3675 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
3676 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
3677 extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
3678
3679 #endif /* __cplusplus */
3680
3681 #endif /* __GMP_IMPL_H__ */