1 /* Include file for internal GNU MP types and definitions.
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
7 2004, 2005, 2006 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 2.1 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 MA 02110-1301, USA. */
27 /* __GMP_DECLSPEC must be given on any global data that will be accessed
28 from outside libgmp, meaning from the test or development programs, or
29 from libgmpxx. Failing to do this will result in an incorrect address
30 being used for the accesses. On functions __GMP_DECLSPEC makes calls
31 from outside libgmp more efficient, but they'll still work fine without
35 #ifndef __GMP_IMPL_H__
36 #define __GMP_IMPL_H__
39 #include <intrinsics.h> /* for _popcnt */
42 /* limits.h is not used in general, since it's an ANSI-ism, and since on
43 solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
44 values (the ABI=64 values).
46 On Cray vector systems, however, we need the system limits.h since sizes
47 of signed and unsigned types can differ there, depending on compiler
48 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For
49 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
50 short can be 24, 32, 46 or 64 bits, and different for ushort. */
56 /* For fat.h and other fat binary stuff.
57 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
58 declared this way are only used to set function pointers in __gmp_cpuvec,
59 they're not called directly. */
60 #define DECL_add_n(name) \
61 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
62 #define DECL_addmul_1(name) \
63 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
64 #define DECL_copyd(name) \
65 void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
66 #define DECL_copyi(name) \
68 #define DECL_divexact_1(name) \
69 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
70 #define DECL_divexact_by3c(name) \
71 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
72 #define DECL_divrem_1(name) \
73 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
74 #define DECL_gcd_1(name) \
75 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
76 #define DECL_lshift(name) \
77 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
78 #define DECL_mod_1(name) \
79 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
80 #define DECL_mod_34lsub1(name) \
81 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
82 #define DECL_modexact_1c_odd(name) \
83 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
84 #define DECL_mul_1(name) \
86 #define DECL_mul_basecase(name) \
87 void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
88 #define DECL_preinv_divrem_1(name) \
89 mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
90 #define DECL_preinv_mod_1(name) \
91 mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
92 #define DECL_rshift(name) \
94 #define DECL_sqr_basecase(name) \
95 void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
96 #define DECL_sub_n(name) \
98 #define DECL_submul_1(name) \
101 #if ! __GMP_WITHIN_CONFIGURE
103 #include "gmp-mparam.h"
104 #include "fib_table.h"
105 #include "mp_bases.h"
111 #if HAVE_INTTYPES_H /* for uint_least32_t */
112 # include <inttypes.h>
120 #include <cstring> /* for strlen */
121 #include <string> /* for std::string */
125 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */
126 #define WANT_TMP_DEBUG 0
130 /* Might search and replace _PROTO to __GMP_PROTO internally one day, to
131 avoid two names for one thing, but no hurry for that. */
132 #define _PROTO(x) __GMP_PROTO(x)
134 /* The following tries to get a good version of alloca. The tests are
135 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
136 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
137 be setup appropriately.
139 ifndef alloca - a cpp define might already exist.
140 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
141 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
143 GCC __builtin_alloca - preferred whenever available.
145 _AIX pragma - IBM compilers need a #pragma in "each module that needs to
146 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
147 used in past versions of GMP, retained still in case it matters.
149 The autoconf manual says this pragma needs to be at the start of a C
150 file, apart from comments and preprocessor directives. Is that true?
151 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
157 # define alloca __builtin_alloca
160 # define alloca(x) __ALLOCA(x)
164 # define alloca _alloca
169 # if defined (_AIX) || defined (_IBMR2)
181 /* if not provided by gmp-mparam.h */
182 #ifndef BYTES_PER_MP_LIMB
183 #define BYTES_PER_MP_LIMB SIZEOF_MP_LIMB_T
185 #ifndef BITS_PER_MP_LIMB
186 #define BITS_PER_MP_LIMB (8 * SIZEOF_MP_LIMB_T)
189 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG)
192 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
193 #if HAVE_UINT_LEAST32_T
194 typedef uint_least32_t gmp_uint_least32_t;
196 #if SIZEOF_UNSIGNED_SHORT >= 4
197 typedef unsigned short gmp_uint_least32_t;
199 #if SIZEOF_UNSIGNED >= 4
200 typedef unsigned gmp_uint_least32_t;
202 typedef unsigned long gmp_uint_least32_t;
208 /* const and signed must match __gmp_const and __gmp_signed, so follow the
209 decision made for those in gmp.h. */
210 #if ! __GMP_HAVE_CONST
211 #define const /* empty */
212 #define signed /* empty */
215 /* "const" basically means a function does nothing but examine its arguments
216 and give a return value, it doesn't read or write any memory (neither
217 global nor pointed to by arguments), and has no other side-effects. This
218 is more restrictive than "pure". See info node "(gcc)Function
219 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
220 this off when trying to write timing loops. */
221 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
222 #define ATTRIBUTE_CONST __attribute__ ((const))
224 #define ATTRIBUTE_CONST
227 #if HAVE_ATTRIBUTE_NORETURN
228 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
230 #define ATTRIBUTE_NORETURN
233 /* "malloc" means a function behaves like malloc in that the pointer it
234 returns doesn't alias anything. */
235 #if HAVE_ATTRIBUTE_MALLOC
236 #define ATTRIBUTE_MALLOC __attribute__ ((malloc))
238 #define ATTRIBUTE_MALLOC
243 #define strchr(s,c) index(s,c)
247 #define memset(p, c, n) \
250 char *__memset__p = (p); \
252 for (__i = 0; __i < (n); __i++) \
253 __memset__p[__i] = (c); \
257 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
258 mode. Falling back to a memcpy will give maximum portability, since it
259 works no matter whether va_list is a pointer, struct or array. */
260 #if ! defined (va_copy) && defined (__va_copy)
261 #define va_copy(dst,src) __va_copy(dst,src)
263 #if ! defined (va_copy)
264 #define va_copy(dst,src) \
265 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
269 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
270 (ie. ctlz, ctpop, cttz). */
271 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \
272 || HAVE_HOST_CPU_alphaev7
273 #define HAVE_HOST_CPU_alpha_CIX 1
277 #if defined (__cplusplus)
284 ptr = TMP_ALLOC (bytes);
287 Small allocations should use TMP_SALLOC, big allocations should use
288 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
290 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
293 TMP_DECL just declares a variable, but might be empty and so must be last
294 in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
295 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
296 TMP_MARK was made, but then no TMP_ALLOCs. */
298 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
299 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC
300 isn't used for "double"s, so that's not in the union. */
305 #define __TMP_ALIGN sizeof (union tmp_align_t)
307 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
308 "a" must be an unsigned type.
309 This is designed for use with a compile-time constant "m".
310 The POW2 case is expected to be usual, and gcc 3.0 and up recognises
311 "(-(8*n))%8" or the like is always zero, which means the rounding up in
312 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */
313 #define ROUND_UP_MULTIPLE(a,m) \
314 (POW2_P(m) ? (a) + (-(a))%(m) \
315 : (a)+(m)-1 - (((a)+(m)-1) % (m)))
317 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
318 struct tmp_reentrant_t {
319 struct tmp_reentrant_t *next;
320 size_t size; /* bytes, including header */
322 void *__gmp_tmp_reentrant_alloc _PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
323 void __gmp_tmp_reentrant_free _PROTO ((struct tmp_reentrant_t *));
328 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
330 #define TMP_MARK __tmp_marker = 0
331 #define TMP_SALLOC(n) alloca(n)
332 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
333 #define TMP_ALLOC(n) \
334 (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
338 if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
342 #if WANT_TMP_REENTRANT
343 #define TMP_SDECL TMP_DECL
344 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
345 #define TMP_SMARK TMP_MARK
346 #define TMP_MARK __tmp_marker = 0
347 #define TMP_SALLOC(n) TMP_ALLOC(n)
348 #define TMP_BALLOC(n) TMP_ALLOC(n)
349 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
350 #define TMP_SFREE TMP_FREE
351 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker)
354 #if WANT_TMP_NOTREENTRANT
357 struct tmp_stack *which_chunk;
360 void *__gmp_tmp_alloc _PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
361 void __gmp_tmp_mark _PROTO ((struct tmp_marker *));
362 void __gmp_tmp_free _PROTO ((struct tmp_marker *));
363 #define TMP_SDECL TMP_DECL
364 #define TMP_DECL struct tmp_marker __tmp_marker
365 #define TMP_SMARK TMP_MARK
366 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker)
367 #define TMP_SALLOC(n) TMP_ALLOC(n)
368 #define TMP_BALLOC(n) TMP_ALLOC(n)
369 #define TMP_ALLOC(n) \
370 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
371 #define TMP_SFREE TMP_FREE
372 #define TMP_FREE __gmp_tmp_free (&__tmp_marker)
376 /* See tal-debug.c for some comments. */
378 struct tmp_debug_entry_t *list;
382 struct tmp_debug_entry_t {
383 struct tmp_debug_entry_t *next;
387 void __gmp_tmp_debug_mark _PROTO ((const char *, int, struct tmp_debug_t **,
388 struct tmp_debug_t *,
389 const char *, const char *));
390 void *__gmp_tmp_debug_alloc _PROTO ((const char *, int, int,
391 struct tmp_debug_t **, const char *,
392 size_t)) ATTRIBUTE_MALLOC;
393 void __gmp_tmp_debug_free _PROTO ((const char *, int, int,
394 struct tmp_debug_t **,
395 const char *, const char *));
396 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
397 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
398 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
399 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
400 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
401 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
402 /* The marker variable is designed to provoke an uninitialized varialble
403 warning from the compiler if TMP_FREE is used without a TMP_MARK.
404 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
405 these things up too. */
406 #define TMP_DECL_NAME(marker, marker_name) \
408 int __tmp_marker_inscope; \
409 const char *__tmp_marker_name = marker_name; \
410 struct tmp_debug_t __tmp_marker_struct; \
411 /* don't demand NULL, just cast a zero */ \
412 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
413 #define TMP_MARK_NAME(marker, marker_name) \
416 __tmp_marker_inscope = 1; \
417 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
418 &__tmp_marker, &__tmp_marker_struct, \
419 __tmp_marker_name, marker_name); \
421 #define TMP_SALLOC(n) TMP_ALLOC(n)
422 #define TMP_BALLOC(n) TMP_ALLOC(n)
423 #define TMP_ALLOC(size) \
424 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
425 __tmp_marker_inscope, \
426 &__tmp_marker, __tmp_marker_name, size)
427 #define TMP_FREE_NAME(marker, marker_name) \
429 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
430 marker, &__tmp_marker, \
431 __tmp_marker_name, marker_name); \
433 #endif /* WANT_TMP_DEBUG */
436 /* Allocating various types. */
437 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
438 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
439 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
440 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
441 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t)
442 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t)
443 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
444 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr)
445 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr)
447 /* It's more efficient to allocate one block than two. This is certainly
448 true of the malloc methods, but it can even be true of alloca if that
449 involves copying a chunk of stack (various RISCs), or a call to a stack
450 bounds check (mingw). In any case, when debugging keep separate blocks
451 so a redzoning malloc debugger can protect each individually. */
452 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
454 if (WANT_TMP_DEBUG) \
456 (xp) = TMP_ALLOC_LIMBS (xsize); \
457 (yp) = TMP_ALLOC_LIMBS (ysize); \
461 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
462 (yp) = (xp) + (xsize); \
467 /* From gmp.h, nicer names for internal use. */
468 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str)
469 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
470 #define LIKELY(cond) __GMP_LIKELY(cond)
471 #define UNLIKELY(cond) __GMP_UNLIKELY(cond)
473 #define ABS(x) ((x) >= 0 ? (x) : -(x))
475 #define MIN(l,o) ((l) < (o) ? (l) : (o))
477 #define MAX(h,i) ((h) > (i) ? (h) : (i))
478 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
480 /* Field access macros. */
481 #define SIZ(x) ((x)->_mp_size)
482 #define ABSIZ(x) ABS (SIZ (x))
483 #define PTR(x) ((x)->_mp_d)
484 #define LIMBS(x) ((x)->_mp_d)
485 #define EXP(x) ((x)->_mp_exp)
486 #define PREC(x) ((x)->_mp_prec)
487 #define ALLOC(x) ((x)->_mp_alloc)
489 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
490 then that lowest one bit must have been the only bit set. n==0 will
491 return true though, so avoid that. */
492 #define POW2_P(n) (((n) & ((n) - 1)) == 0)
495 /* The "short" defines are a bit different because shorts are promoted to
498 #ifndef's are used since on some systems (HP?) header files other than
499 limits.h setup these defines. We could forcibly #undef in that case, but
500 there seems no need to worry about that. */
503 #define ULONG_MAX __GMP_ULONG_MAX
506 #define UINT_MAX __GMP_UINT_MAX
509 #define USHRT_MAX __GMP_USHRT_MAX
511 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
513 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
514 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
515 treats the plain decimal values in <limits.h> as signed. */
516 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
517 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
518 #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
519 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
522 #define LONG_MIN ((long) ULONG_HIGHBIT)
525 #define LONG_MAX (-(LONG_MIN+1))
529 #define INT_MIN ((int) UINT_HIGHBIT)
532 #define INT_MAX (-(INT_MIN+1))
536 #define SHRT_MIN ((short) USHRT_HIGHBIT)
539 #define SHRT_MAX ((short) (-(SHRT_MIN+1)))
542 #if __GMP_MP_SIZE_T_INT
543 #define MP_SIZE_T_MAX INT_MAX
544 #define MP_SIZE_T_MIN INT_MIN
546 #define MP_SIZE_T_MAX LONG_MAX
547 #define MP_SIZE_T_MIN LONG_MIN
550 /* mp_exp_t is the same as mp_size_t */
551 #define MP_EXP_T_MAX MP_SIZE_T_MAX
552 #define MP_EXP_T_MIN MP_SIZE_T_MIN
554 #define LONG_HIGHBIT LONG_MIN
555 #define INT_HIGHBIT INT_MIN
556 #define SHRT_HIGHBIT SHRT_MIN
559 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
561 #if GMP_NAIL_BITS == 0
562 #define GMP_NAIL_LOWBIT CNST_LIMB(0)
564 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
567 #if GMP_NAIL_BITS != 0
568 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
569 code that has not yet been qualified. */
571 #undef DIV_SB_PREINV_THRESHOLD
572 #undef DIV_DC_THRESHOLD
573 #undef POWM_THRESHOLD
574 #define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX
575 #define DIV_DC_THRESHOLD 50
576 #define POWM_THRESHOLD 0
578 #undef GCD_ACCEL_THRESHOLD
579 #define GCD_ACCEL_THRESHOLD 3
581 #undef DIVREM_1_NORM_THRESHOLD
582 #undef DIVREM_1_UNNORM_THRESHOLD
583 #undef MOD_1_NORM_THRESHOLD
584 #undef MOD_1_UNNORM_THRESHOLD
585 #undef USE_PREINV_DIVREM_1
586 #undef USE_PREINV_MOD_1
587 #undef DIVREM_2_THRESHOLD
588 #undef DIVEXACT_1_THRESHOLD
589 #undef MODEXACT_1_ODD_THRESHOLD
590 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
591 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
592 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
593 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
594 #define USE_PREINV_DIVREM_1 0 /* no preinv */
595 #define USE_PREINV_MOD_1 0 /* no preinv */
596 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
598 #undef GET_STR_DC_THRESHOLD
599 #undef GET_STR_PRECOMPUTE_THRESHOLD
600 #undef SET_STR_THRESHOLD
601 #define GET_STR_DC_THRESHOLD 22
602 #define GET_STR_PRECOMPUTE_THRESHOLD 42
603 #define SET_STR_THRESHOLD 3259
605 /* mpn/generic/mul_fft.c is not nails-capable. */
606 #undef MUL_FFT_THRESHOLD
607 #undef SQR_FFT_THRESHOLD
608 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
609 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
614 #define MP_LIMB_T_SWAP(x, y) \
616 mp_limb_t __mp_limb_t_swap__tmp = (x); \
618 (y) = __mp_limb_t_swap__tmp; \
620 #define MP_SIZE_T_SWAP(x, y) \
622 mp_size_t __mp_size_t_swap__tmp = (x); \
624 (y) = __mp_size_t_swap__tmp; \
627 #define MP_PTR_SWAP(x, y) \
629 mp_ptr __mp_ptr_swap__tmp = (x); \
631 (y) = __mp_ptr_swap__tmp; \
633 #define MP_SRCPTR_SWAP(x, y) \
635 mp_srcptr __mp_srcptr_swap__tmp = (x); \
637 (y) = __mp_srcptr_swap__tmp; \
640 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
642 MP_PTR_SWAP (xp, yp); \
643 MP_SIZE_T_SWAP (xs, ys); \
645 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
647 MP_SRCPTR_SWAP (xp, yp); \
648 MP_SIZE_T_SWAP (xs, ys); \
651 #define MPZ_PTR_SWAP(x, y) \
653 mpz_ptr __mpz_ptr_swap__tmp = (x); \
655 (y) = __mpz_ptr_swap__tmp; \
657 #define MPZ_SRCPTR_SWAP(x, y) \
659 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
661 (y) = __mpz_srcptr_swap__tmp; \
665 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
666 but current gcc (3.0) doesn't seem to support that. */
667 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
668 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
669 __GMP_DECLSPEC extern void (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
671 void *__gmp_default_allocate _PROTO ((size_t));
672 void *__gmp_default_reallocate _PROTO ((void *, size_t, size_t));
673 void __gmp_default_free _PROTO ((void *, size_t));
675 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
676 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
677 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
679 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
680 ((type *) (*__gmp_reallocate_func) \
681 (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
682 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
683 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
685 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
686 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
688 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
690 if ((oldsize) != (newsize)) \
691 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
694 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
696 if ((oldsize) != (newsize)) \
697 (ptr) = (type *) (*__gmp_reallocate_func) \
698 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
702 /* Dummy for non-gcc, code involving it will go dead. */
703 #if ! defined (__GNUC__) || __GNUC__ < 2
704 #define __builtin_constant_p(x) 0
708 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
709 stack usage is compatible. __attribute__ ((regparm (N))) helps by
710 putting leading parameters in registers, avoiding extra stack.
712 regparm cannot be used with calls going through the PLT, because the
713 binding code there may clobber the registers (%eax, %edx, %ecx) used for
714 the regparm parameters. Calls to local (ie. static) functions could
715 still use this, if we cared to differentiate locals and globals.
717 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
718 -p or -pg profiling, since that version of gcc doesn't realize the
719 .mcount calls will clobber the parameter registers. Other systems are
720 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
721 bother to try to detect this. regparm is only an optimization so we just
722 disable it when profiling (profiling being a slowdown anyway). */
724 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
725 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
726 #define USE_LEADING_REGPARM 1
728 #define USE_LEADING_REGPARM 0
731 /* Macros for altering parameter order according to regparm usage. */
732 #if USE_LEADING_REGPARM
733 #define REGPARM_2_1(a,b,x) x,a,b
734 #define REGPARM_3_1(a,b,c,x) x,a,b,c
735 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
737 #define REGPARM_2_1(a,b,x) a,b,x
738 #define REGPARM_3_1(a,b,c,x) a,b,c,x
739 #define REGPARM_ATTR(n)
743 /* ASM_L gives a local label for a gcc asm block, for use when temporary
744 local labels like "1:" might not be available, which is the case for
745 instance on the x86s (the SCO assembler doesn't support them).
747 The label generated is made unique by including "%=" which is a unique
748 number for each insn. This ensures the same name can be used in multiple
749 asm blocks, perhaps via a macro. Since jumps between asm blocks are not
750 allowed there's no need for a label to be usable outside a single
753 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
756 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
758 /* FIXME: Check that these actually improve things.
759 FIXME: Need a cld after each std.
760 FIXME: Can't have inputs in clobbered registers, must describe them as
761 dummy outputs, and add volatile. */
762 #define MPN_COPY_INCR(DST, SRC, N) \
763 __asm__ ("cld\n\trep\n\tmovsl" : : \
764 "D" (DST), "S" (SRC), "c" (N) : \
765 "cx", "di", "si", "memory")
766 #define MPN_COPY_DECR(DST, SRC, N) \
767 __asm__ ("std\n\trep\n\tmovsl" : : \
768 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
769 "cx", "di", "si", "memory")
774 void __gmpz_aorsmul_1 _PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr u, mp_limb_t v, mp_size_t sub))) REGPARM_ATTR(1);
775 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
777 #define mpz_n_pow_ui __gmpz_n_pow_ui
778 void mpz_n_pow_ui _PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
781 #define mpn_add_nc __MPN(add_nc)
782 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
784 #define mpn_addmul_1c __MPN(addmul_1c)
785 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
787 #define mpn_addmul_2 __MPN(addmul_2)
788 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
790 #define mpn_addmul_3 __MPN(addmul_3)
791 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
793 #define mpn_addmul_4 __MPN(addmul_4)
794 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
796 #define mpn_addmul_5 __MPN(addmul_5)
797 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
799 #define mpn_addmul_6 __MPN(addmul_6)
800 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
802 #define mpn_addmul_7 __MPN(addmul_7)
803 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
805 #define mpn_addmul_8 __MPN(addmul_8)
806 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
808 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
809 returns the carry out (0, 1 or 2). */
810 #define mpn_addlsh1_n __MPN(addlsh1_n)
811 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
813 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
814 returns the borrow out (0, 1 or 2). */
815 #define mpn_sublsh1_n __MPN(sublsh1_n)
816 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
818 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
819 and returns the bit rshifted out (0 or 1). */
820 #define mpn_rsh1add_n __MPN(rsh1add_n)
821 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
823 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
824 and returns the bit rshifted out (0 or 1). If there's a borrow from the
825 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
826 complement negative. */
827 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
828 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
830 #define mpn_addsub_n __MPN(addsub_n)
831 __GMP_DECLSPEC mp_limb_t mpn_addsub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
833 #define mpn_addsub_nc __MPN(addsub_nc)
834 __GMP_DECLSPEC mp_limb_t mpn_addsub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
836 #define mpn_divrem_1c __MPN(divrem_1c)
837 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
839 #define mpn_dump __MPN(dump)
840 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
842 #define mpn_fib2_ui __MPN(fib2_ui)
843 mp_size_t mpn_fib2_ui _PROTO ((mp_ptr, mp_ptr, unsigned long));
845 /* Remap names of internal mpn functions. */
846 #define __clz_tab __MPN(clz_tab)
847 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
849 #define mpn_gcd_finda __MPN(gcd_finda)
850 mp_limb_t mpn_gcd_finda _PROTO((const mp_limb_t cp[2])) __GMP_ATTRIBUTE_PURE;
852 #define mpn_jacobi_base __MPN(jacobi_base)
853 int mpn_jacobi_base _PROTO ((mp_limb_t a, mp_limb_t b, int result_bit1)) ATTRIBUTE_CONST;
855 #define mpn_mod_1c __MPN(mod_1c)
856 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
858 #define mpn_mul_1c __MPN(mul_1c)
859 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
861 #define mpn_mul_2 __MPN(mul_2)
862 mp_limb_t mpn_mul_2 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
864 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
865 #define mpn_mul_basecase __MPN(mul_basecase)
866 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
869 #define mpn_mullow_n __MPN(mullow_n)
870 __GMP_DECLSPEC void mpn_mullow_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
872 #define mpn_mullow_basecase __MPN(mullow_basecase)
873 __GMP_DECLSPEC void mpn_mullow_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
875 #define mpn_sqr_n __MPN(sqr_n)
876 __GMP_DECLSPEC void mpn_sqr_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
878 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
879 #define mpn_sqr_basecase __MPN(sqr_basecase)
880 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
883 #define mpn_sub_nc __MPN(sub_nc)
884 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
886 #define mpn_submul_1c __MPN(submul_1c)
887 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
889 #define mpn_invert_2exp __MPN(invert_2exp)
890 __GMP_DECLSPEC void mpn_invert_2exp __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
892 #define mpn_redc_1 __MPN(redc_1)
893 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);)
895 #define mpn_redc_2 __MPN(redc_2)
896 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
899 typedef __gmp_randstate_struct *gmp_randstate_ptr;
900 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
902 /* Pseudo-random number generator function pointers structure. */
904 void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t rstate, mpz_srcptr seed));
905 void (*randget_fn) __GMP_PROTO ((gmp_randstate_t rstate, mp_ptr dest, unsigned long int nbits));
906 void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t rstate));
907 void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
910 /* Macro to obtain a void pointer to the function pointers structure. */
911 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
913 /* Macro to obtain a pointer to the generator's state.
914 When used as a lvalue the rvalue needs to be cast to mp_ptr. */
915 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
917 /* Write a given number of random bits to rp. */
918 #define _gmp_rand(rp, state, bits) \
920 gmp_randstate_ptr __rstate = (state); \
921 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
922 (__rstate, rp, bits); \
925 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
928 /* __gmp_rands is the global state for the old-style random functions, and
929 is also used in the test programs (hence the __GMP_DECLSPEC).
931 There's no seeding here, so mpz_random etc will generate the same
932 sequence every time. This is not unlike the C library random functions
933 if you don't seed them, so perhaps it's acceptable. Digging up a seed
934 from /dev/random or the like would work on many systems, but might
935 encourage a false confidence, since it'd be pretty much impossible to do
936 something that would work reliably everywhere. In any case the new style
937 functions are recommended to applications which care about randomness, so
938 the old functions aren't too important. */
940 __GMP_DECLSPEC extern char __gmp_rands_initialized;
941 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
944 ((__gmp_rands_initialized ? 0 \
945 : (__gmp_rands_initialized = 1, \
946 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
949 /* this is used by the test programs, to free memory */
950 #define RANDS_CLEAR() \
952 if (__gmp_rands_initialized) \
954 __gmp_rands_initialized = 0; \
955 gmp_randclear (__gmp_rands); \
960 /* kara uses n+1 limbs of temporary space and then recurses with the balance,
961 so need (n+1) + (ceil(n/2)+1) + (ceil(n/4)+1) + ... This can be solved to
962 2n + o(n). Since n is very limited, o(n) in practice could be around 15.
963 For now, assume n is arbitrarily large. */
964 #define MPN_KARA_MUL_N_TSIZE(n) (2*(n) + 2*GMP_LIMB_BITS)
965 #define MPN_KARA_SQR_N_TSIZE(n) (2*(n) + 2*GMP_LIMB_BITS)
967 /* toom3 uses 2n + 2n/3 + o(n) limbs of temporary space if mpn_sublsh1_n is
968 unavailable, but just 2n + o(n) if mpn_sublsh1_n is available. It is hard
969 to pin down the value of o(n), since it is a complex function of
970 MUL_TOOM3_THRESHOLD and n. Normally toom3 is used between kara and fft; in
971 that case o(n) will be really limited. If toom3 is used for arbitrarily
972 large operands, o(n) will be larger. These definitions handle operands of
973 up to 8956264246117233 limbs. A single multiplication using toom3 on the
974 fastest hardware currently (2003) would need 100 million years, which
975 suggests that these limits are acceptable. */
977 #if HAVE_NATIVE_mpn_sublsh1_n
978 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 63)
979 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 63)
981 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 2*(n/3) + 63)
982 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 2*(n/3) + 63)
985 #if HAVE_NATIVE_mpn_sublsh1_n
986 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 255)
987 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 255)
989 #define MPN_TOOM3_MUL_N_TSIZE(n) (2*(n) + 2*(n/3) + 255)
990 #define MPN_TOOM3_SQR_N_TSIZE(n) (2*(n) + 2*(n/3) + 255)
992 #define MPN_TOOM3_MAX_N 285405
993 #endif /* WANT_FFT */
995 /* need 2 so that n2>=1 */
996 #define MPN_KARA_MUL_N_MINSIZE 2
997 #define MPN_KARA_SQR_N_MINSIZE 2
999 /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */
1000 #define MPN_TOOM3_MUL_N_MINSIZE 17
1001 #define MPN_TOOM3_SQR_N_MINSIZE 17
1003 #define mpn_sqr_diagonal __MPN(sqr_diagonal)
1004 void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1006 #define mpn_kara_mul_n __MPN(kara_mul_n)
1007 void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
1009 #define mpn_kara_sqr_n __MPN(kara_sqr_n)
1010 void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1012 #define mpn_toom3_mul_n __MPN(toom3_mul_n)
1013 void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
1015 #define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
1016 void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1019 #define mpn_fft_best_k __MPN(fft_best_k)
1020 int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)) ATTRIBUTE_CONST;
1022 #define mpn_mul_fft __MPN(mul_fft)
1023 int mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
1024 mp_srcptr n, mp_size_t nl,
1025 mp_srcptr m, mp_size_t ml,
1028 #define mpn_mul_fft_full __MPN(mul_fft_full)
1029 void mpn_mul_fft_full _PROTO ((mp_ptr op,
1030 mp_srcptr n, mp_size_t nl,
1031 mp_srcptr m, mp_size_t ml));
1033 #define mpn_fft_next_size __MPN(fft_next_size)
1034 mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k)) ATTRIBUTE_CONST;
1036 #define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
1037 mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t,
1038 mp_srcptr, mp_size_t));
1040 #define mpn_dc_divrem_n __MPN(dc_divrem_n)
1041 mp_limb_t mpn_dc_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
1043 /* #define mpn_tdiv_q __MPN(tdiv_q) */
1044 /* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
1046 #define mpz_divexact_gcd __gmpz_divexact_gcd
1047 void mpz_divexact_gcd _PROTO ((mpz_ptr q, mpz_srcptr a, mpz_srcptr d));
1049 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1050 #ifdef _GMP_H_HAVE_FILE
1051 size_t mpz_inp_str_nowhite _PROTO ((mpz_ptr x, FILE *stream, int base, int c, size_t nread));
1054 #define mpn_divisible_p __MPN(divisible_p)
1055 int mpn_divisible_p _PROTO ((mp_srcptr ap, mp_size_t asize,
1056 mp_srcptr dp, mp_size_t dsize)) __GMP_ATTRIBUTE_PURE;
1058 #define mpn_rootrem __MPN(rootrem)
1059 mp_size_t mpn_rootrem _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1063 #define MPN_COPY_INCR(dst, src, n) \
1065 int __i; /* Faster on some Crays with plain int */ \
1066 _Pragma ("_CRI ivdep"); \
1067 for (__i = 0; __i < (n); __i++) \
1068 (dst)[__i] = (src)[__i]; \
1072 /* used by test programs, hence __GMP_DECLSPEC */
1073 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
1074 #define mpn_copyi __MPN(copyi)
1075 __GMP_DECLSPEC void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1078 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1079 #define MPN_COPY_INCR(dst, src, size) \
1081 ASSERT ((size) >= 0); \
1082 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
1083 mpn_copyi (dst, src, size); \
1087 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
1088 #if ! defined (MPN_COPY_INCR)
1089 #define MPN_COPY_INCR(dst, src, n) \
1091 ASSERT ((n) >= 0); \
1092 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
1095 mp_size_t __n = (n) - 1; \
1096 mp_ptr __dst = (dst); \
1097 mp_srcptr __src = (src); \
1116 #define MPN_COPY_DECR(dst, src, n) \
1118 int __i; /* Faster on some Crays with plain int */ \
1119 _Pragma ("_CRI ivdep"); \
1120 for (__i = (n) - 1; __i >= 0; __i--) \
1121 (dst)[__i] = (src)[__i]; \
1125 /* used by test programs, hence __GMP_DECLSPEC */
1126 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
1127 #define mpn_copyd __MPN(copyd)
1128 __GMP_DECLSPEC void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1131 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1132 #define MPN_COPY_DECR(dst, src, size) \
1134 ASSERT ((size) >= 0); \
1135 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
1136 mpn_copyd (dst, src, size); \
1140 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
1141 #if ! defined (MPN_COPY_DECR)
1142 #define MPN_COPY_DECR(dst, src, n) \
1144 ASSERT ((n) >= 0); \
1145 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
1148 mp_size_t __n = (n) - 1; \
1149 mp_ptr __dst = (dst) + __n; \
1150 mp_srcptr __src = (src) + __n; \
1169 #define MPN_COPY(d,s,n) \
1171 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
1172 MPN_COPY_INCR (d, s, n); \
1177 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1178 #define MPN_REVERSE(dst, src, size) \
1180 mp_ptr __dst = (dst); \
1181 mp_size_t __size = (size); \
1182 mp_srcptr __src = (src) + __size - 1; \
1184 ASSERT ((size) >= 0); \
1185 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
1186 CRAY_Pragma ("_CRI ivdep"); \
1187 for (__i = 0; __i < __size; __i++) \
1196 /* Zero n limbs at dst.
1198 For power and powerpc we want an inline stu/bdnz loop for zeroing. On
1199 ppc630 for instance this is optimal since it can sustain only 1 store per
1202 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1203 "for" loop in the generic code below can become stu/bdnz. The do/while
1204 here helps it get to that. The same caveat about plain -mpowerpc64 mode
1205 applies here as to __GMPN_COPY_INCR in gmp.h.
1207 xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1210 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1211 at a time. MPN_ZERO isn't all that important in GMP, so it might be more
1212 trouble than it's worth to do the same, though perhaps a call to memset
1213 would be good when on a GNU system. */
1215 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1216 #define MPN_ZERO(dst, n) \
1218 ASSERT ((n) >= 0); \
1221 mp_ptr __dst = (dst) - 1; \
1222 mp_size_t __n = (n); \
1231 #define MPN_ZERO(dst, n) \
1233 ASSERT ((n) >= 0); \
1236 mp_ptr __dst = (dst); \
1237 mp_size_t __n = (n); \
1246 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1247 start up and would need to strip a lot of zeros before it'd be faster
1248 than a simple cmpl loop. Here are some times in cycles for
1249 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1258 #ifndef MPN_NORMALIZE
1259 #define MPN_NORMALIZE(DST, NLIMBS) \
1261 while ((NLIMBS) > 0) \
1263 if ((DST)[(NLIMBS) - 1] != 0) \
1269 #ifndef MPN_NORMALIZE_NOT_ZERO
1270 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
1272 ASSERT ((NLIMBS) >= 1); \
1275 if ((DST)[(NLIMBS) - 1] != 0) \
1282 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1283 and decrementing size. low should be ptr[0], and will be the new ptr[0]
1284 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
1285 somewhere a non-zero limb. */
1286 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
1288 ASSERT ((size) >= 1); \
1289 ASSERT ((low) == (ptr)[0]); \
1291 while ((low) == 0) \
1294 ASSERT ((size) >= 1); \
1300 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1301 temporary variable; it will be automatically cleared out at function
1302 return. We use __x here to make it possible to accept both mpz_ptr and
1304 #define MPZ_TMP_INIT(X, NLIMBS) \
1306 mpz_ptr __x = (X); \
1307 ASSERT ((NLIMBS) >= 1); \
1308 __x->_mp_alloc = (NLIMBS); \
1309 __x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
1312 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
1313 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
1314 ? (mp_ptr) _mpz_realloc(z,n) \
1317 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
1320 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1323 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1324 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
1325 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1327 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1328 without good floating point. There's +2 for rounding up, and a further
1329 +2 since at the last step x limbs are doubled into a 2x+1 limb region
1330 whereas the actual F[2k] value might be only 2x-1 limbs.
1332 Note that a division is done first, since on a 32-bit system it's at
1333 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
1334 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1335 whatever a multiply of two 190Mbyte numbers takes.)
1337 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1338 worked into the multiplier. */
1340 #define MPN_FIB2_SIZE(n) \
1341 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1344 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
1345 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1347 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1348 F[n] + 2*F[n-1] fits in a limb. */
1350 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1351 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
1354 /* For a threshold between algorithms A and B, size>=thresh is where B
1355 should be used. Special value MP_SIZE_T_MAX means only ever use A, or
1356 value 0 means only ever use B. The tests for these special values will
1357 be compile-time constants, so the compiler should be able to eliminate
1358 the code for the unwanted algorithm. */
1360 #define ABOVE_THRESHOLD(size,thresh) \
1362 || ((thresh) != MP_SIZE_T_MAX \
1363 && (size) >= (thresh)))
1364 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
1366 /* Usage: int use_foo = BELOW_THRESHOLD (size, FOO_THRESHOLD);
1368 if (CACHED_BELOW_THRESHOLD (use_foo, size, FOO_THRESHOLD))
1370 When "use_foo" is a constant (thresh is 0 or MP_SIZE_T), gcc prior to
1371 version 3.3 doesn't optimize away a test "if (use_foo)" when within a
1372 loop. CACHED_BELOW_THRESHOLD helps it do so. */
1374 #define CACHED_ABOVE_THRESHOLD(cache, thresh) \
1375 ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX \
1376 ? ABOVE_THRESHOLD (0, thresh) \
1378 #define CACHED_BELOW_THRESHOLD(cache, thresh) \
1379 ((thresh) == 0 || (thresh) == MP_SIZE_T_MAX \
1380 ? BELOW_THRESHOLD (0, thresh) \
1384 /* If MUL_KARATSUBA_THRESHOLD is not already defined, define it to a
1385 value which is good on most machines. */
1386 #ifndef MUL_KARATSUBA_THRESHOLD
1387 #define MUL_KARATSUBA_THRESHOLD 32
1390 /* If MUL_TOOM3_THRESHOLD is not already defined, define it to a
1391 value which is good on most machines. */
1392 #ifndef MUL_TOOM3_THRESHOLD
1393 #define MUL_TOOM3_THRESHOLD 128
1396 /* MUL_KARATSUBA_THRESHOLD_LIMIT is the maximum for MUL_KARATSUBA_THRESHOLD.
1397 In a normal build MUL_KARATSUBA_THRESHOLD is a constant and we use that.
1398 In a fat binary or tune program build MUL_KARATSUBA_THRESHOLD is a
1399 variable and a separate hard limit will have been defined. Similarly for
1401 #ifndef MUL_KARATSUBA_THRESHOLD_LIMIT
1402 #define MUL_KARATSUBA_THRESHOLD_LIMIT MUL_KARATSUBA_THRESHOLD
1404 #ifndef MUL_TOOM3_THRESHOLD_LIMIT
1405 #define MUL_TOOM3_THRESHOLD_LIMIT MUL_TOOM3_THRESHOLD
1407 #ifndef MULLOW_BASECASE_THRESHOLD_LIMIT
1408 #define MULLOW_BASECASE_THRESHOLD_LIMIT MULLOW_BASECASE_THRESHOLD
1411 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1412 mpn_mul_basecase in mpn_sqr_n. Default is to use mpn_sqr_basecase
1413 always. (Note that we certainly always want it if there's a native
1414 assembler mpn_sqr_basecase.)
1416 If it turns out that mpn_kara_sqr_n becomes faster than mpn_mul_basecase
1417 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the
1418 karatsuba threshold and SQR_KARATSUBA_THRESHOLD is 0. This oddity arises
1419 more or less because SQR_KARATSUBA_THRESHOLD represents the size up to
1420 which mpn_sqr_basecase should be used, and that may be never. */
1422 #ifndef SQR_BASECASE_THRESHOLD
1423 #define SQR_BASECASE_THRESHOLD 0
1426 #ifndef SQR_KARATSUBA_THRESHOLD
1427 #define SQR_KARATSUBA_THRESHOLD (2*MUL_KARATSUBA_THRESHOLD)
1430 #ifndef SQR_TOOM3_THRESHOLD
1431 #define SQR_TOOM3_THRESHOLD 128
1434 /* See comments above about MUL_TOOM3_THRESHOLD_LIMIT. */
1435 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1436 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
1439 /* First k to use for an FFT modF multiply. A modF FFT is an order
1440 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1441 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
1442 #define FFT_FIRST_K 4
1444 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1445 #ifndef MUL_FFT_MODF_THRESHOLD
1446 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM3_THRESHOLD * 3)
1448 #ifndef SQR_FFT_MODF_THRESHOLD
1449 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
1452 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
1453 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1454 NxN->2N multiply and not recursing into itself is an order
1455 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1456 is the first better than toom3. */
1457 #ifndef MUL_FFT_THRESHOLD
1458 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
1460 #ifndef SQR_FFT_THRESHOLD
1461 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
1464 /* Table of thresholds for successive modF FFT "k"s. The first entry is
1465 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1466 etc. See mpn_fft_best_k(). */
1467 #ifndef MUL_FFT_TABLE
1468 #define MUL_FFT_TABLE \
1469 { MUL_TOOM3_THRESHOLD * 4, /* k=5 */ \
1470 MUL_TOOM3_THRESHOLD * 8, /* k=6 */ \
1471 MUL_TOOM3_THRESHOLD * 16, /* k=7 */ \
1472 MUL_TOOM3_THRESHOLD * 32, /* k=8 */ \
1473 MUL_TOOM3_THRESHOLD * 96, /* k=9 */ \
1474 MUL_TOOM3_THRESHOLD * 288, /* k=10 */ \
1477 #ifndef SQR_FFT_TABLE
1478 #define SQR_FFT_TABLE \
1479 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
1480 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
1481 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
1482 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
1483 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
1484 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
1488 #ifndef FFT_TABLE_ATTRS
1489 #define FFT_TABLE_ATTRS static const
1492 #define MPN_FFT_TABLE_SIZE 16
1495 /* mpn_dc_divrem_n(n) calls 2*mul(n/2)+2*div(n/2), thus to be faster than
1496 div(n) = 4*div(n/2), we need mul(n/2) to be faster than the classic way,
1497 i.e. n/2 >= MUL_KARATSUBA_THRESHOLD
1499 Measured values are between 2 and 4 times MUL_KARATSUBA_THRESHOLD, so go
1500 for 3 as an average. */
1502 #ifndef DIV_DC_THRESHOLD
1503 #define DIV_DC_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD)
1507 /* Return non-zero if xp,xsize and yp,ysize overlap.
1508 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1509 overlap. If both these are false, there's an overlap. */
1510 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1511 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1512 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
1513 ( (char *) (xp) + (xsize) > (char *) (yp) \
1514 && (char *) (yp) + (ysize) > (char *) (xp))
1516 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1517 overlapping. Return zero if they're partially overlapping. */
1518 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
1519 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1520 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
1521 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1523 /* Return non-zero if dst,dsize and src,ssize are either identical or
1524 overlapping in a way suitable for an incrementing/decrementing algorithm.
1525 Return zero if they're partially overlapping in an unsuitable fashion. */
1526 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
1527 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1528 #define MPN_SAME_OR_INCR_P(dst, src, size) \
1529 MPN_SAME_OR_INCR2_P(dst, size, src, size)
1530 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
1531 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1532 #define MPN_SAME_OR_DECR_P(dst, src, size) \
1533 MPN_SAME_OR_DECR2_P(dst, size, src, size)
1536 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1537 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1538 does it always. Generally assertions are meant for development, but
1539 might help when looking for a problem later too.
1541 Note that strings shouldn't be used within the ASSERT expression,
1542 eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1543 used in the !HAVE_STRINGIZE case (ie. K&R). */
1546 #define ASSERT_LINE __LINE__
1548 #define ASSERT_LINE -1
1552 #define ASSERT_FILE __FILE__
1554 #define ASSERT_FILE ""
1557 void __gmp_assert_header _PROTO ((const char *filename, int linenum));
1558 __GMP_DECLSPEC void __gmp_assert_fail _PROTO ((const char *filename, int linenum, const char *expr)) ATTRIBUTE_NORETURN;
1561 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1563 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
1566 #define ASSERT_ALWAYS(expr) \
1569 ASSERT_FAIL (expr); \
1573 #define ASSERT(expr) ASSERT_ALWAYS (expr)
1575 #define ASSERT(expr) do {} while (0)
1579 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
1580 that it's zero. In both cases if assertion checking is disabled the
1581 expression is still evaluated. These macros are meant for use with
1582 routines like mpn_add_n() where the return value represents a carry or
1583 whatever that should or shouldn't occur in some context. For example,
1584 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
1586 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
1587 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
1589 #define ASSERT_CARRY(expr) (expr)
1590 #define ASSERT_NOCARRY(expr) (expr)
1594 /* ASSERT_CODE includes code when assertion checking is wanted. This is the
1595 same as writing "#if WANT_ASSERT", but more compact. */
1597 #define ASSERT_CODE(expr) expr
1599 #define ASSERT_CODE(expr)
1603 /* Test that an mpq_t is in fully canonical form. This can be used as
1604 protection on routines like mpq_equal which give wrong results on
1605 non-canonical inputs. */
1607 #define ASSERT_MPQ_CANONICAL(q) \
1609 ASSERT (q->_mp_den._mp_size > 0); \
1610 if (q->_mp_num._mp_size == 0) \
1612 /* zero should be 0/1 */ \
1613 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
1617 /* no common factors */ \
1620 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
1621 ASSERT (mpz_cmp_ui (__g, 1) == 0); \
1626 #define ASSERT_MPQ_CANONICAL(q) do {} while (0)
1629 /* Check that the nail parts are zero. */
1630 #define ASSERT_ALWAYS_LIMB(limb) \
1632 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
1633 ASSERT_ALWAYS (__nail == 0); \
1635 #define ASSERT_ALWAYS_MPN(ptr, size) \
1637 /* let whole loop go dead when no nails */ \
1638 if (GMP_NAIL_BITS != 0) \
1641 for (__i = 0; __i < (size); __i++) \
1642 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
1646 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
1647 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
1649 #define ASSERT_LIMB(limb) do {} while (0)
1650 #define ASSERT_MPN(ptr, size) do {} while (0)
1654 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
1655 size==0 is allowed, and in that case {ptr,size} considered to be zero. */
1657 #define ASSERT_MPN_ZERO_P(ptr,size) \
1660 ASSERT ((size) >= 0); \
1661 for (__i = 0; __i < (size); __i++) \
1662 ASSERT ((ptr)[__i] == 0); \
1664 #define ASSERT_MPN_NONZERO_P(ptr,size) \
1667 int __nonzero = 0; \
1668 ASSERT ((size) >= 0); \
1669 for (__i = 0; __i < (size); __i++) \
1670 if ((ptr)[__i] != 0) \
1675 ASSERT (__nonzero); \
1678 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
1679 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
1683 #if HAVE_NATIVE_mpn_com_n
1684 #define mpn_com_n __MPN(com_n)
1685 void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1687 #define mpn_com_n(d,s,n) \
1690 mp_srcptr __s = (s); \
1691 mp_size_t __n = (n); \
1692 ASSERT (__n >= 1); \
1693 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
1695 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
1700 #define MPN_LOGOPS_N_INLINE(d, s1, s2, n, operation) \
1703 mp_srcptr __s1 = (s1); \
1704 mp_srcptr __s2 = (s2); \
1705 mp_size_t __n = (n); \
1706 ASSERT (__n >= 1); \
1707 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s1, __n)); \
1708 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s2, __n)); \
1714 #if HAVE_NATIVE_mpn_and_n
1715 #define mpn_and_n __MPN(and_n)
1716 void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1718 #define mpn_and_n(d, s1, s2, n) \
1719 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & *__s2++)
1722 #if HAVE_NATIVE_mpn_andn_n
1723 #define mpn_andn_n __MPN(andn_n)
1724 void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1726 #define mpn_andn_n(d, s1, s2, n) \
1727 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ & ~*__s2++)
1730 #if HAVE_NATIVE_mpn_nand_n
1731 #define mpn_nand_n __MPN(nand_n)
1732 void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1734 #define mpn_nand_n(d, s1, s2, n) \
1735 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ & *__s2++) & GMP_NUMB_MASK)
1738 #if HAVE_NATIVE_mpn_ior_n
1739 #define mpn_ior_n __MPN(ior_n)
1740 void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1742 #define mpn_ior_n(d, s1, s2, n) \
1743 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ | *__s2++)
1746 #if HAVE_NATIVE_mpn_iorn_n
1747 #define mpn_iorn_n __MPN(iorn_n)
1748 void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1750 #define mpn_iorn_n(d, s1, s2, n) \
1751 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = (*__s1++ | ~*__s2++) & GMP_NUMB_MASK)
1754 #if HAVE_NATIVE_mpn_nior_n
1755 #define mpn_nior_n __MPN(nior_n)
1756 void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1758 #define mpn_nior_n(d, s1, s2, n) \
1759 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ | *__s2++) & GMP_NUMB_MASK)
1762 #if HAVE_NATIVE_mpn_xor_n
1763 #define mpn_xor_n __MPN(xor_n)
1764 void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1766 #define mpn_xor_n(d, s1, s2, n) \
1767 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = *__s1++ ^ *__s2++)
1770 #if HAVE_NATIVE_mpn_xnor_n
1771 #define mpn_xnor_n __MPN(xnor_n)
1772 void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
1774 #define mpn_xnor_n(d, s1, s2, n) \
1775 MPN_LOGOPS_N_INLINE (d, s1, s2, n, *__d++ = ~(*__s1++ ^ *__s2++) & GMP_NUMB_MASK)
1779 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
1780 #if GMP_NAIL_BITS == 0
1781 #define ADDC_LIMB(cout, w, x, y) \
1783 mp_limb_t __x = (x); \
1784 mp_limb_t __y = (y); \
1785 mp_limb_t __w = __x + __y; \
1787 (cout) = __w < __x; \
1790 #define ADDC_LIMB(cout, w, x, y) \
1796 (w) = __w & GMP_NUMB_MASK; \
1797 (cout) = __w >> GMP_NUMB_BITS; \
1801 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
1803 #if GMP_NAIL_BITS == 0
1804 #define SUBC_LIMB(cout, w, x, y) \
1806 mp_limb_t __x = (x); \
1807 mp_limb_t __y = (y); \
1808 mp_limb_t __w = __x - __y; \
1810 (cout) = __w > __x; \
1813 #define SUBC_LIMB(cout, w, x, y) \
1815 mp_limb_t __w = (x) - (y); \
1816 (w) = __w & GMP_NUMB_MASK; \
1817 (cout) = __w >> (GMP_LIMB_BITS-1); \
1822 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
1823 expecting no carry (or borrow) from that.
1825 The size parameter is only for the benefit of assertion checking. In a
1826 normal build it's unused and the carry/borrow is just propagated as far
1829 On random data, usually only one or two limbs of {ptr,size} get updated,
1830 so there's no need for any sophisticated looping, just something compact
1833 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
1834 declaring their operand sizes, then remove the former. This is purely
1835 for the benefit of assertion checking. */
1837 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \
1838 && BITS_PER_MP_LIMB == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
1839 /* Better flags handling than the generic C gives on i386, saving a few
1840 bytes of code and maybe a cycle or two. */
1842 #define MPN_IORD_U(ptr, incr, aors) \
1844 mp_ptr __ptr_dummy; \
1845 if (__builtin_constant_p (incr) && (incr) == 1) \
1847 __asm__ __volatile__ \
1848 ("\n" ASM_L(top) ":\n" \
1849 "\t" aors " $1, (%0)\n" \
1850 "\tleal 4(%0),%0\n" \
1851 "\tjc " ASM_L(top) \
1852 : "=r" (__ptr_dummy) \
1858 __asm__ __volatile__ \
1859 ( aors " %2,(%0)\n" \
1860 "\tjnc " ASM_L(done) "\n" \
1862 "\t" aors " $1,4(%0)\n" \
1863 "\tleal 4(%0),%0\n" \
1864 "\tjc " ASM_L(top) "\n" \
1866 : "=r" (__ptr_dummy) \
1873 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
1874 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
1875 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
1876 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
1879 #if GMP_NAIL_BITS == 0
1881 #define mpn_incr_u(p,incr) \
1885 if (__builtin_constant_p (incr) && (incr) == 1) \
1887 while (++(*(__p++)) == 0) \
1892 __x = *__p + (incr); \
1895 while (++(*(++__p)) == 0) \
1901 #define mpn_decr_u(p,incr) \
1905 if (__builtin_constant_p (incr) && (incr) == 1) \
1907 while ((*(__p++))-- == 0) \
1913 *__p = __x - (incr); \
1915 while ((*(++__p))-- == 0) \
1922 #if GMP_NAIL_BITS >= 1
1924 #define mpn_incr_u(p,incr) \
1928 if (__builtin_constant_p (incr) && (incr) == 1) \
1932 __x = (*__p + 1) & GMP_NUMB_MASK; \
1939 __x = (*__p + (incr)); \
1940 *__p++ = __x & GMP_NUMB_MASK; \
1941 if (__x >> GMP_NUMB_BITS != 0) \
1945 __x = (*__p + 1) & GMP_NUMB_MASK; \
1954 #define mpn_decr_u(p,incr) \
1958 if (__builtin_constant_p (incr) && (incr) == 1) \
1963 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
1969 __x = *__p - (incr); \
1970 *__p++ = __x & GMP_NUMB_MASK; \
1971 if (__x >> GMP_NUMB_BITS != 0) \
1976 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
1987 #define MPN_INCR_U(ptr, size, n) \
1989 ASSERT ((size) >= 1); \
1990 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
1993 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
1999 #define MPN_DECR_U(ptr, size, n) \
2001 ASSERT ((size) >= 1); \
2002 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
2005 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
2010 /* Structure for conversion between internal binary format and
2011 strings in base 2..36. */
2014 /* Number of digits in the conversion base that always fits in an mp_limb_t.
2015 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2016 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
2019 /* log(2)/log(conversion_base) */
2020 double chars_per_bit_exactly;
2022 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2023 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
2024 i.e. the number of bits used to represent each digit in the base. */
2027 /* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
2028 fixed-point number. Instead of dividing by big_base an application can
2029 choose to multiply by big_base_inverted. */
2030 mp_limb_t big_base_inverted;
2033 #define mp_bases __MPN(bases)
2034 #define __mp_bases __MPN(bases)
2035 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2038 /* For power of 2 bases this is exact. For other bases the result is either
2039 exact or one too big.
2041 To be exact always it'd be necessary to examine all the limbs of the
2042 operand, since numbers like 100..000 and 99...999 generally differ only
2043 in the lowest limb. It'd be possible to examine just a couple of high
2044 limbs to increase the probability of being exact, but that doesn't seem
2045 worth bothering with. */
2047 #define MPN_SIZEINBASE(result, ptr, size, base) \
2049 int __lb_base, __cnt; \
2052 ASSERT ((size) >= 0); \
2053 ASSERT ((base) >= 2); \
2054 ASSERT ((base) < numberof (mp_bases)); \
2056 /* Special case for X == 0. */ \
2061 /* Calculate the total number of significant bits of X. */ \
2062 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2063 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2065 if (POW2_P (base)) \
2067 __lb_base = mp_bases[base].big_base; \
2068 (result) = (__totbits + __lb_base - 1) / __lb_base; \
2071 (result) = (size_t) \
2072 (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \
2076 /* eliminate mp_bases lookups for base==16 */
2077 #define MPN_SIZEINBASE_16(result, ptr, size) \
2080 mp_size_t __totbits; \
2082 ASSERT ((size) >= 0); \
2084 /* Special case for X == 0. */ \
2089 /* Calculate the total number of significant bits of X. */ \
2090 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2091 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2092 (result) = (__totbits + 4 - 1) / 4; \
2096 /* bit count to limb count, rounding up */
2097 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2099 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake
2100 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs
2101 in both cases (LIMBS_PER_ULONG is also defined here.) */
2102 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2104 #define LIMBS_PER_ULONG 1
2105 #define MPN_SET_UI(zp, zn, u) \
2107 (zn) = ((zp)[0] != 0);
2108 #define MPZ_FAKE_UI(z, zp, u) \
2111 SIZ (z) = ((zp)[0] != 0); \
2112 ASSERT_CODE (ALLOC (z) = 1);
2114 #else /* need two limbs per ulong */
2116 #define LIMBS_PER_ULONG 2
2117 #define MPN_SET_UI(zp, zn, u) \
2118 (zp)[0] = (u) & GMP_NUMB_MASK; \
2119 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2120 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2121 #define MPZ_FAKE_UI(z, zp, u) \
2122 (zp)[0] = (u) & GMP_NUMB_MASK; \
2123 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2124 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2126 ASSERT_CODE (ALLOC (z) = 2);
2131 #if HAVE_HOST_CPU_FAMILY_x86
2132 #define TARGET_REGISTER_STARVED 1
2134 #define TARGET_REGISTER_STARVED 0
2138 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2139 or 0 there into a limb 0xFF..FF or 0 respectively.
2141 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2142 but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2143 a little compile-time test and a fallback to a "? :" form. The latter is
2144 necessary for instance on Cray vector systems.
2146 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2147 to an arithmetic right shift anyway, but it's good to get the desired
2148 shift on past versions too (in particular since an important use of
2149 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
2151 #define LIMB_HIGHBIT_TO_MASK(n) \
2152 (((mp_limb_signed_t) -1 >> 1) < 0 \
2153 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
2154 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2157 /* Use a library function for invert_limb, if available. */
2158 #define mpn_invert_limb __MPN(invert_limb)
2159 mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2160 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2161 #define invert_limb(invxl,xl) \
2163 (invxl) = mpn_invert_limb (xl); \
2168 #define invert_limb(invxl,xl) \
2171 ASSERT ((xl) != 0); \
2172 udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl); \
2176 #ifndef udiv_qrnnd_preinv
2177 #define udiv_qrnnd_preinv udiv_qrnnd_preinv2
2180 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2181 limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
2182 If this would yield overflow, DI should be the largest possible number
2183 (i.e., only ones). For correct operation, the most significant bit of D
2184 has to be set. Put the quotient in Q and the remainder in R. */
2185 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di) \
2187 mp_limb_t _q, _ql, _r; \
2188 mp_limb_t _xh, _xl; \
2189 ASSERT ((d) != 0); \
2190 umul_ppmm (_q, _ql, (nh), (di)); \
2191 _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */ \
2192 umul_ppmm (_xh, _xl, _q, (d)); \
2193 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
2196 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
2213 /* Like udiv_qrnnd_preinv, but branch-free. */
2214 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di) \
2216 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2217 mp_limb_t _xh, _xl; \
2220 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2221 _nadj = _n10 + (_nmask & (d)); \
2222 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2223 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2225 umul_ppmm (_xh, _xl, _q1, d); \
2226 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2227 _xh -= (d); /* xh = 0 or -1 */ \
2228 (r) = _xl + ((d) & _xh); \
2232 /* Like udiv_qrnnd_preinv2, but for for any value D. DNORM is D shifted left
2233 so that its most significant bit is set. LGUP is ceil(log2(D)). */
2234 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2236 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2237 mp_limb_t _xh, _xl; \
2238 _n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
2239 _n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
2240 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2241 _nadj = _n10 + (_nmask & (dnorm)); \
2242 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2243 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2245 umul_ppmm (_xh, _xl, _q1, d); \
2246 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2248 (r) = _xl + ((d) & _xh); \
2253 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
2254 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
2255 mp_limb_t mpn_preinv_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2259 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to
2260 the plain mpn_divrem_1. Likewise USE_PREINV_MOD_1 chooses between
2261 mpn_preinv_mod_1 and plain mpn_mod_1. The default for both is yes, since
2262 the few CISC chips where preinv is not good have defines saying so. */
2263 #ifndef USE_PREINV_DIVREM_1
2264 #define USE_PREINV_DIVREM_1 1
2266 #ifndef USE_PREINV_MOD_1
2267 #define USE_PREINV_MOD_1 1
2270 #if USE_PREINV_DIVREM_1
2271 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2272 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2274 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2275 mpn_divrem_1 (qp, xsize, ap, size, d)
2278 #if USE_PREINV_MOD_1
2279 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
2280 mpn_preinv_mod_1 (src, size, divisor, inverse)
2282 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
2283 mpn_mod_1 (src, size, divisor)
2287 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
2288 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
2289 mp_limb_t mpn_mod_34lsub1 _PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2293 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2294 plain mpn_divrem_1. Likewise MODEXACT_1_ODD_THRESHOLD for
2295 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
2296 modexact are faster at all sizes, so the defaults are 0. Those CPUs
2297 where this is not right have a tuned threshold. */
2298 #ifndef DIVEXACT_1_THRESHOLD
2299 #define DIVEXACT_1_THRESHOLD 0
2301 #ifndef MODEXACT_1_ODD_THRESHOLD
2302 #define MODEXACT_1_ODD_THRESHOLD 0
2305 #ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */
2306 #define mpn_divexact_1 __MPN(divexact_1)
2307 void mpn_divexact_1 _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2310 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \
2312 if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \
2313 ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2316 ASSERT (mpn_mod_1 (src, size, divisor) == 0); \
2317 mpn_divexact_1 (dst, src, size, divisor); \
2321 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
2322 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
2323 mp_limb_t mpn_modexact_1c_odd _PROTO ((mp_srcptr src, mp_size_t size,
2324 mp_limb_t divisor, mp_limb_t c)) __GMP_ATTRIBUTE_PURE;
2327 #if HAVE_NATIVE_mpn_modexact_1_odd
2328 #define mpn_modexact_1_odd __MPN(modexact_1_odd)
2329 mp_limb_t mpn_modexact_1_odd _PROTO ((mp_srcptr src, mp_size_t size,
2330 mp_limb_t divisor)) __GMP_ATTRIBUTE_PURE;
2332 #define mpn_modexact_1_odd(src,size,divisor) \
2333 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2336 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
2337 (ABOVE_THRESHOLD (size, MODEXACT_1_ODD_THRESHOLD) \
2338 ? mpn_modexact_1_odd (src, size, divisor) \
2339 : mpn_mod_1 (src, size, divisor))
2342 /* modlimb_invert() sets inv to the multiplicative inverse of n modulo
2343 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2344 n must be odd (otherwise such an inverse doesn't exist).
2346 This is not to be confused with invert_limb(), which is completely
2349 The table lookup gives an inverse with the low 8 bits valid, and each
2350 multiply step doubles the number of bits. See Jebelean "An algorithm for
2351 exact division" end of section 4 (reference in gmp.texi).
2353 Possible enhancement: Could use UHWtype until the last step, if half-size
2354 multiplies are faster (might help under _LONG_LONG_LIMB).
2356 Alternative: As noted in Granlund and Montgomery "Division by Invariant
2357 Integers using Multiplication" (reference in gmp.texi), n itself gives a
2358 3-bit inverse immediately, and could be used instead of a table lookup.
2359 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2360 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
2362 #define modlimb_invert_table __gmp_modlimb_invert_table
2363 __GMP_DECLSPEC extern const unsigned char modlimb_invert_table[128];
2365 #define modlimb_invert(inv,n) \
2367 mp_limb_t __n = (n); \
2369 ASSERT ((__n & 1) == 1); \
2371 __inv = modlimb_invert_table[(__n/2) & 0x7F]; /* 8 */ \
2372 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
2373 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
2374 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
2376 if (GMP_NUMB_BITS > 64) \
2378 int __invbits = 64; \
2380 __inv = 2 * __inv - __inv * __inv * __n; \
2382 } while (__invbits < GMP_NUMB_BITS); \
2385 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
2386 (inv) = __inv & GMP_NUMB_MASK; \
2389 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2390 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2391 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2392 we need to start from GMP_NUMB_MAX>>1. */
2393 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2395 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2396 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2397 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1)
2398 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2401 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
2403 It's not clear whether this is the best way to do this calculation.
2404 Anything congruent to -a would be fine for the one limb congruence
2407 #define NEG_MOD(r, a, d) \
2409 ASSERT ((d) != 0); \
2415 /* small a is reasonably likely */ \
2421 mp_limb_t __dnorm; \
2422 count_leading_zeros (__twos, d); \
2423 __twos -= GMP_NAIL_BITS; \
2424 __dnorm = (d) << __twos; \
2425 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
2431 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2432 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
2435 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2436 to 0 if there's an even number. "n" should be an unsigned long and "p"
2439 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2440 #define ULONG_PARITY(p, n) \
2443 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \
2448 /* Cray intrinsic _popcnt. */
2450 #define ULONG_PARITY(p, n) \
2452 (p) = _popcnt (n) & 1; \
2456 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
2457 && ! defined (NO_ASM) && defined (__ia64)
2458 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
2459 to a 64 bit unsigned long long for popcnt */
2460 #define ULONG_PARITY(p, n) \
2462 unsigned long long __n = (unsigned long) (n); \
2464 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
2469 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
2470 #if __GMP_GNUC_PREREQ (3,1)
2471 #define __GMP_qm "=Qm"
2472 #define __GMP_q "=Q"
2474 #define __GMP_qm "=qm"
2475 #define __GMP_q "=q"
2477 #define ULONG_PARITY(p, n) \
2480 unsigned long __n = (n); \
2481 __n ^= (__n >> 16); \
2482 __asm__ ("xorb %h1, %b1\n\t" \
2484 : __GMP_qm (__p), __GMP_q (__n) \
2490 #if ! defined (ULONG_PARITY)
2491 #define ULONG_PARITY(p, n) \
2493 unsigned long __n = (n); \
2497 __p ^= 0x96696996L >> (__n & 0x1F); \
2507 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of
2508 version 3.1 at least) doesn't seem to know how to generate rlwimi for
2509 anything other than bit-fields, so use "asm". */
2510 #if defined (__GNUC__) && ! defined (NO_ASM) \
2511 && HAVE_HOST_CPU_FAMILY_powerpc && BITS_PER_MP_LIMB == 32
2512 #define BSWAP_LIMB(dst, src) \
2514 mp_limb_t __bswapl_src = (src); \
2515 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \
2516 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \
2517 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \
2518 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \
2519 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \
2520 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \
2521 (dst) = __tmp1 | __tmp2; /* whole */ \
2525 /* bswap is available on i486 and up and is fast. A combination rorw $8 /
2526 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
2527 kernel with xchgb instead of rorw), but this is not done here, because
2528 i386 means generic x86 and mixing word and dword operations will cause
2529 partial register stalls on P6 chips. */
2530 #if defined (__GNUC__) && ! defined (NO_ASM) \
2531 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
2532 && BITS_PER_MP_LIMB == 32
2533 #define BSWAP_LIMB(dst, src) \
2535 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \
2539 #if defined (__GNUC__) && ! defined (NO_ASM) \
2540 && defined (__amd64__) && BITS_PER_MP_LIMB == 64
2541 #define BSWAP_LIMB(dst, src) \
2543 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \
2547 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
2548 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
2549 #define BSWAP_LIMB(dst, src) \
2551 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
2556 #if defined (__GNUC__) && ! defined (NO_ASM) \
2557 && HAVE_HOST_CPU_FAMILY_m68k && BITS_PER_MP_LIMB == 32
2558 #define BSWAP_LIMB(dst, src) \
2560 mp_limb_t __bswapl_src = (src); \
2561 __asm__ ("ror%.w %#8, %0\n\t" \
2565 : "0" (__bswapl_src)); \
2569 #if ! defined (BSWAP_LIMB)
2570 #if BITS_PER_MP_LIMB == 8
2571 #define BSWAP_LIMB(dst, src) \
2572 do { (dst) = (src); } while (0)
2574 #if BITS_PER_MP_LIMB == 16
2575 #define BSWAP_LIMB(dst, src) \
2577 (dst) = ((src) << 8) + ((src) >> 8); \
2580 #if BITS_PER_MP_LIMB == 32
2581 #define BSWAP_LIMB(dst, src) \
2585 + (((src) & 0xFF00) << 8) \
2586 + (((src) >> 8) & 0xFF00) \
2590 #if BITS_PER_MP_LIMB == 64
2591 #define BSWAP_LIMB(dst, src) \
2595 + (((src) & 0xFF00) << 40) \
2596 + (((src) & 0xFF0000) << 24) \
2597 + (((src) & 0xFF000000) << 8) \
2598 + (((src) >> 8) & 0xFF000000) \
2599 + (((src) >> 24) & 0xFF0000) \
2600 + (((src) >> 40) & 0xFF00) \
2606 #if ! defined (BSWAP_LIMB)
2607 #define BSWAP_LIMB(dst, src) \
2609 mp_limb_t __bswapl_src = (src); \
2610 mp_limb_t __dst = 0; \
2612 for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \
2614 __dst = (__dst << 8) | (__bswapl_src & 0xFF); \
2615 __bswapl_src >>= 8; \
2622 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
2623 those we know are fast. */
2624 #if defined (__GNUC__) && ! defined (NO_ASM) \
2625 && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \
2626 && (HAVE_HOST_CPU_powerpc604 \
2627 || HAVE_HOST_CPU_powerpc604e \
2628 || HAVE_HOST_CPU_powerpc750 \
2629 || HAVE_HOST_CPU_powerpc7400)
2630 #define BSWAP_LIMB_FETCH(limb, src) \
2632 mp_srcptr __blf_src = (src); \
2634 __asm__ ("lwbrx %0, 0, %1" \
2636 : "r" (__blf_src), \
2637 "m" (*__blf_src)); \
2642 #if ! defined (BSWAP_LIMB_FETCH)
2643 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
2647 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
2648 know are fast. FIXME: Is this necessary? */
2649 #if defined (__GNUC__) && ! defined (NO_ASM) \
2650 && BITS_PER_MP_LIMB == 32 && HAVE_LIMB_BIG_ENDIAN \
2651 && (HAVE_HOST_CPU_powerpc604 \
2652 || HAVE_HOST_CPU_powerpc604e \
2653 || HAVE_HOST_CPU_powerpc750 \
2654 || HAVE_HOST_CPU_powerpc7400)
2655 #define BSWAP_LIMB_STORE(dst, limb) \
2657 mp_ptr __dst = (dst); \
2658 mp_limb_t __limb = (limb); \
2659 __asm__ ("stwbrx %1, 0, %2" \
2666 #if ! defined (BSWAP_LIMB_STORE)
2667 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
2671 /* Byte swap limbs from {src,size} and store at {dst,size}. */
2672 #define MPN_BSWAP(dst, src, size) \
2674 mp_ptr __dst = (dst); \
2675 mp_srcptr __src = (src); \
2676 mp_size_t __size = (size); \
2678 ASSERT ((size) >= 0); \
2679 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
2680 CRAY_Pragma ("_CRI ivdep"); \
2681 for (__i = 0; __i < __size; __i++) \
2683 BSWAP_LIMB_FETCH (*__dst, __src); \
2689 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
2690 #define MPN_BSWAP_REVERSE(dst, src, size) \
2692 mp_ptr __dst = (dst); \
2693 mp_size_t __size = (size); \
2694 mp_srcptr __src = (src) + __size - 1; \
2696 ASSERT ((size) >= 0); \
2697 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
2698 CRAY_Pragma ("_CRI ivdep"); \
2699 for (__i = 0; __i < __size; __i++) \
2701 BSWAP_LIMB_FETCH (*__dst, __src); \
2708 /* No processor claiming to be SPARC v9 compliant seems to
2709 implement the POPC instruction. Disable pattern for now. */
2711 #if defined __GNUC__ && defined __sparc_v9__ && BITS_PER_MP_LIMB == 64
2712 #define popc_limb(result, input) \
2715 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \
2720 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2721 #define popc_limb(result, input) \
2723 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \
2727 /* Cray intrinsic. */
2729 #define popc_limb(result, input) \
2731 (result) = _popcnt (input); \
2735 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
2736 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
2737 #define popc_limb(result, input) \
2739 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
2743 /* Cool population count of an mp_limb_t.
2744 You have to figure out how this works, We won't tell you!
2746 The constants could also be expressed as:
2747 0x55... = [2^N / 3] = [(2^N-1)/3]
2748 0x33... = [2^N / 5] = [(2^N-1)/5]
2749 0x0f... = [2^N / 17] = [(2^N-1)/17]
2750 (N is GMP_LIMB_BITS, [] denotes truncation.) */
2752 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
2753 #define popc_limb(result, input) \
2755 mp_limb_t __x = (input); \
2756 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
2757 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
2758 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
2759 (result) = __x & 0xff; \
2763 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
2764 #define popc_limb(result, input) \
2766 mp_limb_t __x = (input); \
2767 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
2768 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
2769 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
2770 if (GMP_LIMB_BITS > 8) \
2771 __x = ((__x >> 8) + __x); \
2772 (result) = __x & 0xff; \
2776 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
2777 #define popc_limb(result, input) \
2779 mp_limb_t __x = (input); \
2780 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
2781 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
2782 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
2783 if (GMP_LIMB_BITS > 8) \
2784 __x = ((__x >> 8) + __x); \
2785 if (GMP_LIMB_BITS > 16) \
2786 __x = ((__x >> 16) + __x); \
2787 (result) = __x & 0xff; \
2791 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
2792 #define popc_limb(result, input) \
2794 mp_limb_t __x = (input); \
2795 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
2796 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
2797 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
2798 if (GMP_LIMB_BITS > 8) \
2799 __x = ((__x >> 8) + __x); \
2800 if (GMP_LIMB_BITS > 16) \
2801 __x = ((__x >> 16) + __x); \
2802 if (GMP_LIMB_BITS > 32) \
2803 __x = ((__x >> 32) + __x); \
2804 (result) = __x & 0xff; \
2809 /* Define stuff for longlong.h. */
2810 #if HAVE_ATTRIBUTE_MODE
2811 typedef unsigned int UQItype __attribute__ ((mode (QI)));
2812 typedef int SItype __attribute__ ((mode (SI)));
2813 typedef unsigned int USItype __attribute__ ((mode (SI)));
2814 typedef int DItype __attribute__ ((mode (DI)));
2815 typedef unsigned int UDItype __attribute__ ((mode (DI)));
2817 typedef unsigned char UQItype;
2818 typedef long SItype;
2819 typedef unsigned long USItype;
2821 typedef long long int DItype;
2822 typedef unsigned long long int UDItype;
2823 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
2824 typedef long int DItype;
2825 typedef unsigned long int UDItype;
2829 typedef mp_limb_t UWtype;
2830 typedef unsigned int UHWtype;
2831 #define W_TYPE_SIZE BITS_PER_MP_LIMB
2833 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
2835 Bit field packing is "implementation defined" according to C99, which
2836 leaves us at the compiler's mercy here. For some systems packing is
2837 defined in the ABI (eg. x86). In any case so far it seems universal that
2838 little endian systems pack from low to high, and big endian from high to
2839 low within the given type.
2841 Within the fields we rely on the integer endianness being the same as the
2842 float endianness, this is true everywhere we know of and it'd be a fairly
2843 strange system that did anything else. */
2845 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
2846 #define _GMP_IEEE_FLOATS 1
2847 union ieee_double_extract
2851 gmp_uint_least32_t manh:20;
2852 gmp_uint_least32_t exp:11;
2853 gmp_uint_least32_t sig:1;
2854 gmp_uint_least32_t manl:32;
2860 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
2861 #define _GMP_IEEE_FLOATS 1
2862 union ieee_double_extract
2866 gmp_uint_least32_t manl:32;
2867 gmp_uint_least32_t manh:20;
2868 gmp_uint_least32_t exp:11;
2869 gmp_uint_least32_t sig:1;
2875 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
2876 #define _GMP_IEEE_FLOATS 1
2877 union ieee_double_extract
2881 gmp_uint_least32_t sig:1;
2882 gmp_uint_least32_t exp:11;
2883 gmp_uint_least32_t manh:20;
2884 gmp_uint_least32_t manl:32;
2891 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
2892 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
2893 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
2894 /* Maximum number of limbs it will take to store any `double'.
2895 We assume doubles have 53 mantissam bits. */
2896 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS + 1)
2898 int __gmp_extract_double _PROTO ((mp_ptr, double));
2900 #define mpn_get_d __gmpn_get_d
2901 double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
2904 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
2905 a_inf if x is an infinity. Both are considered unlikely values, for
2906 branch prediction. */
2908 #if _GMP_IEEE_FLOATS
2909 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
2911 union ieee_double_extract u; \
2913 if (UNLIKELY (u.s.exp == 0x7FF)) \
2915 if (u.s.manl == 0 && u.s.manh == 0) \
2923 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
2924 /* no nans or infs in these formats */
2925 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
2929 #ifndef DOUBLE_NAN_INF_ACTION
2930 /* Unknown format, try something generic.
2931 NaN should be "unordered", so x!=x.
2932 Inf should be bigger than DBL_MAX. */
2933 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
2936 if (UNLIKELY ((x) != (x))) \
2938 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
2944 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
2945 in the coprocessor, which means a bigger exponent range than normal, and
2946 depending on the rounding mode, a bigger mantissa than normal. (See
2947 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches
2948 "d" through memory to force any rounding and overflows to occur.
2950 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
2951 registers, where there's no such extra precision and no need for the
2952 FORCE_DOUBLE. We don't bother to detect this since the present uses for
2953 FORCE_DOUBLE are only in test programs and default generic C code.
2955 Not quite sure that an "automatic volatile" will use memory, but it does
2956 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
2957 apparently matching operands like "0" are only allowed on a register
2958 output. gcc 3.4 warns about this, though in fact it and past versions
2959 seem to put the operand through memory as hoped. */
2961 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \
2962 || defined (__amd64__))
2963 #define FORCE_DOUBLE(d) \
2964 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
2966 #define FORCE_DOUBLE(d) do { } while (0)
2970 extern int __gmp_junk;
2971 extern const int __gmp_0;
2972 void __gmp_exception _PROTO ((int)) ATTRIBUTE_NORETURN;
2973 void __gmp_divide_by_zero _PROTO ((void)) ATTRIBUTE_NORETURN;
2974 void __gmp_sqrt_of_negative _PROTO ((void)) ATTRIBUTE_NORETURN;
2975 void __gmp_invalid_operation _PROTO ((void)) ATTRIBUTE_NORETURN;
2976 #define GMP_ERROR(code) __gmp_exception (code)
2977 #define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
2978 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
2980 #if defined _LONG_LONG_LIMB
2981 #if __GMP_HAVE_TOKEN_PASTE
2982 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
2984 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
2986 #else /* not _LONG_LONG_LIMB */
2987 #if __GMP_HAVE_TOKEN_PASTE
2988 #define CNST_LIMB(C) ((mp_limb_t) C##L)
2990 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
2992 #endif /* _LONG_LONG_LIMB */
2994 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
2995 #if GMP_NUMB_BITS == 2
2996 #define PP 0x3 /* 3 */
2997 #define PP_FIRST_OMITTED 5
2999 #if GMP_NUMB_BITS == 4
3000 #define PP 0xF /* 3 x 5 */
3001 #define PP_FIRST_OMITTED 7
3003 #if GMP_NUMB_BITS == 8
3004 #define PP 0x69 /* 3 x 5 x 7 */
3005 #define PP_FIRST_OMITTED 11
3007 #if GMP_NUMB_BITS == 16
3008 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
3009 #define PP_FIRST_OMITTED 17
3011 #if GMP_NUMB_BITS == 32
3012 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
3013 #define PP_INVERTED 0x53E5645CL
3014 #define PP_FIRST_OMITTED 31
3016 #if GMP_NUMB_BITS == 64
3017 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
3018 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3019 #define PP_FIRST_OMITTED 59
3021 #ifndef PP_FIRST_OMITTED
3022 #define PP_FIRST_OMITTED 3
3027 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3028 zero bit representing +1 and a one bit representing -1. Bits other than
3029 bit 1 are garbage. These are meant to be kept in "int"s, and casts are
3030 used to ensure the expressions are "int"s even if a and/or b might be
3033 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3034 and their speed is important. Expressions are used rather than
3035 conditionals to accumulate sign changes, which effectively means XORs
3036 instead of conditional JUMPs. */
3038 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3039 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
3041 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3042 #define JACOBI_U0(a) ((a) == 1)
3044 /* (a/0), with a given by low and size;
3045 is 1 if a=+/-1, 0 otherwise */
3046 #define JACOBI_LS0(alow,asize) \
3047 (((asize) == 1 || (asize) == -1) && (alow) == 1)
3049 /* (a/0), with a an mpz_t;
3050 fetch of low limb always valid, even if size is zero */
3051 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
3053 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3054 #define JACOBI_0U(b) ((b) == 1)
3056 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3057 #define JACOBI_0S(b) ((b) == 1 || (b) == -1)
3059 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3060 #define JACOBI_0LS(blow,bsize) \
3061 (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3063 /* Convert a bit1 to +1 or -1. */
3064 #define JACOBI_BIT1_TO_PN(result_bit1) \
3065 (1 - ((int) (result_bit1) & 2))
3067 /* (2/b), with b unsigned and odd;
3068 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3069 hence obtained from (b>>1)^b */
3070 #define JACOBI_TWO_U_BIT1(b) \
3071 ((int) (((b) >> 1) ^ (b)))
3073 /* (2/b)^twos, with b unsigned and odd */
3074 #define JACOBI_TWOS_U_BIT1(twos, b) \
3075 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3077 /* (2/b)^twos, with b unsigned and odd */
3078 #define JACOBI_TWOS_U(twos, b) \
3079 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3081 /* (-1/b), with b odd (signed or unsigned);
3082 is (-1)^((b-1)/2) */
3083 #define JACOBI_N1B_BIT1(b) \
3086 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3087 is (-1/b) if a<0, or +1 if a>=0 */
3088 #define JACOBI_ASGN_SU_BIT1(a, b) \
3089 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3091 /* (a/b) effect due to sign of b: signed/signed;
3092 is -1 if a and b both negative, +1 otherwise */
3093 #define JACOBI_BSGN_SS_BIT1(a, b) \
3094 ((((a)<0) & ((b)<0)) << 1)
3096 /* (a/b) effect due to sign of b: signed/mpz;
3097 is -1 if a and b both negative, +1 otherwise */
3098 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3099 JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3101 /* (a/b) effect due to sign of b: mpz/signed;
3102 is -1 if a and b both negative, +1 otherwise */
3103 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3104 JACOBI_BSGN_SZ_BIT1 (b, a)
3106 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3107 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3108 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
3109 because this is used in a couple of places with only bit 1 of a or b
3111 #define JACOBI_RECIP_UU_BIT1(a, b) \
3114 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3115 decrementing b_size. b_low should be b_ptr[0] on entry, and will be
3116 updated for the new b_ptr. result_bit1 is updated according to the
3117 factors of 2 stripped, as per (a/2). */
3118 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
3120 ASSERT ((b_size) >= 1); \
3121 ASSERT ((b_low) == (b_ptr)[0]); \
3123 while (UNLIKELY ((b_low) == 0)) \
3126 ASSERT ((b_size) >= 1); \
3128 (b_low) = *(b_ptr); \
3130 ASSERT (((a) & 1) != 0); \
3131 if ((GMP_NUMB_BITS % 2) == 1) \
3132 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
3136 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3137 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
3138 unsigned. modexact_1_odd effectively calculates -a mod b, and
3139 result_bit1 is adjusted for the factor of -1.
3141 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3142 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3143 factor to introduce into result_bit1, so for that case use mpn_mod_1
3146 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3147 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
3148 or not skip a divide step, or something. */
3150 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3152 mp_srcptr __a_ptr = (a_ptr); \
3153 mp_size_t __a_size = (a_size); \
3154 mp_limb_t __b = (b); \
3156 ASSERT (__a_size >= 1); \
3159 if ((GMP_NUMB_BITS % 2) != 0 \
3160 || BELOW_THRESHOLD (__a_size, MODEXACT_1_ODD_THRESHOLD)) \
3162 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
3166 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
3167 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
3172 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3173 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
3174 hence giving back the user's size in bits rounded up. Notice that
3175 converting prec->bits->prec gives an unchanged value. */
3176 #define __GMPF_BITS_TO_PREC(n) \
3177 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3178 #define __GMPF_PREC_TO_BITS(n) \
3179 ((unsigned long) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3181 extern mp_size_t __gmp_default_fp_limb_precision;
3184 /* Set n to the number of significant digits an mpf of the given _mp_prec
3185 field, in the given base. This is a rounded up value, designed to ensure
3186 there's enough digits to reproduce all the guaranteed part of the value.
3188 There are prec many limbs, but the high might be only "1" so forget it
3189 and just count prec-1 limbs into chars. +1 rounds that upwards, and a
3190 further +1 is because the limbs usually won't fall on digit boundaries.
3192 FIXME: If base is a power of 2 and the bits per digit divides
3193 BITS_PER_MP_LIMB then the +2 is unnecessary. This happens always for
3194 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3196 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
3198 ASSERT (base >= 2 && base < numberof (mp_bases)); \
3199 (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS) \
3200 * mp_bases[(base)].chars_per_bit_exactly); \
3204 /* Decimal point string, from the current C locale. Needs <langinfo.h> for
3205 nl_langinfo and constants, preferrably with _GNU_SOURCE defined to get
3206 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3207 their respective #if HAVE_FOO_H.
3209 GLIBC recommends nl_langinfo because getting only one facet can be
3210 faster, apparently. */
3212 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3213 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3214 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT))
3216 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3217 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3218 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR))
3220 /* localeconv is slower since it returns all locale stuff */
3221 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3222 #define GMP_DECIMAL_POINT (localeconv()->decimal_point)
3224 #if ! defined (GMP_DECIMAL_POINT)
3225 #define GMP_DECIMAL_POINT (".")
3229 #define DOPRNT_CONV_FIXED 1
3230 #define DOPRNT_CONV_SCIENTIFIC 2
3231 #define DOPRNT_CONV_GENERAL 3
3233 #define DOPRNT_JUSTIFY_NONE 0
3234 #define DOPRNT_JUSTIFY_LEFT 1
3235 #define DOPRNT_JUSTIFY_RIGHT 2
3236 #define DOPRNT_JUSTIFY_INTERNAL 3
3238 #define DOPRNT_SHOWBASE_YES 1
3239 #define DOPRNT_SHOWBASE_NO 2
3240 #define DOPRNT_SHOWBASE_NONZERO 3
3242 struct doprnt_params_t {
3243 int base; /* negative for upper case */
3244 int conv; /* choices above */
3245 const char *expfmt; /* exponent format */
3246 int exptimes4; /* exponent multiply by 4 */
3247 char fill; /* character */
3248 int justify; /* choices above */
3249 int prec; /* prec field, or -1 for all digits */
3250 int showbase; /* choices above */
3251 int showpoint; /* if radix point always shown */
3252 int showtrailing; /* if trailing zeros wanted */
3253 char sign; /* '+', ' ', or '\0' */
3254 int width; /* width field */
3257 #if _GMP_H_HAVE_VA_LIST
3259 typedef int (*doprnt_format_t) _PROTO ((void *data, const char *fmt, va_list ap));
3260 typedef int (*doprnt_memory_t) _PROTO ((void *data, const char *str, size_t len));
3261 typedef int (*doprnt_reps_t) _PROTO ((void *data, int c, int reps));
3262 typedef int (*doprnt_final_t) _PROTO ((void *data));
3264 struct doprnt_funs_t {
3265 doprnt_format_t format;
3266 doprnt_memory_t memory;
3268 doprnt_final_t final; /* NULL if not required */
3271 extern const struct doprnt_funs_t __gmp_fprintf_funs;
3272 extern const struct doprnt_funs_t __gmp_sprintf_funs;
3273 extern const struct doprnt_funs_t __gmp_snprintf_funs;
3274 extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
3275 extern const struct doprnt_funs_t __gmp_ostream_funs;
3277 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
3278 "size" of these have been written. "alloc > size" is maintained, so
3279 there's room to store a '\0' at the end. "result" is where the
3280 application wants the final block pointer. */
3281 struct gmp_asprintf_t {
3288 #define GMP_ASPRINTF_T_INIT(d, output) \
3290 (d).result = (output); \
3292 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
3296 /* If a realloc is necessary, use twice the size actually required, so as to
3297 avoid repeated small reallocs. */
3298 #define GMP_ASPRINTF_T_NEED(d, n) \
3300 size_t alloc, newsize, newalloc; \
3301 ASSERT ((d)->alloc >= (d)->size + 1); \
3303 alloc = (d)->alloc; \
3304 newsize = (d)->size + (n); \
3305 if (alloc <= newsize) \
3307 newalloc = 2*newsize; \
3308 (d)->alloc = newalloc; \
3309 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
3310 alloc, newalloc, char); \
3314 __GMP_DECLSPEC int __gmp_asprintf_memory _PROTO ((struct gmp_asprintf_t *d, const char *str, size_t len));
3315 __GMP_DECLSPEC int __gmp_asprintf_reps _PROTO ((struct gmp_asprintf_t *d, int c, int reps));
3316 __GMP_DECLSPEC int __gmp_asprintf_final _PROTO ((struct gmp_asprintf_t *d));
3318 /* buf is where to write the next output, and size is how much space is left
3319 there. If the application passed size==0 then that's what we'll have
3320 here, and nothing at all should be written. */
3321 struct gmp_snprintf_t {
3326 /* Add the bytes printed by the call to the total retval, or bail out on an
3328 #define DOPRNT_ACCUMULATE(call) \
3336 #define DOPRNT_ACCUMULATE_FUN(fun, params) \
3338 ASSERT ((fun) != NULL); \
3339 DOPRNT_ACCUMULATE ((*(fun)) params); \
3342 #define DOPRNT_FORMAT(fmt, ap) \
3343 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
3344 #define DOPRNT_MEMORY(ptr, len) \
3345 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
3346 #define DOPRNT_REPS(c, n) \
3347 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
3349 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
3351 #define DOPRNT_REPS_MAYBE(c, n) \
3354 DOPRNT_REPS (c, n); \
3356 #define DOPRNT_MEMORY_MAYBE(ptr, len) \
3359 DOPRNT_MEMORY (ptr, len); \
3362 __GMP_DECLSPEC int __gmp_doprnt _PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
3363 __GMP_DECLSPEC int __gmp_doprnt_integer _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
3365 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
3366 __GMP_DECLSPEC int __gmp_doprnt_mpf _PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
3368 int __gmp_replacement_vsnprintf _PROTO ((char *, size_t, const char *, va_list));
3369 #endif /* _GMP_H_HAVE_VA_LIST */
3372 typedef int (*gmp_doscan_scan_t) _PROTO ((void *, const char *, ...));
3373 typedef void *(*gmp_doscan_step_t) _PROTO ((void *, int));
3374 typedef int (*gmp_doscan_get_t) _PROTO ((void *));
3375 typedef int (*gmp_doscan_unget_t) _PROTO ((int, void *));
3377 struct gmp_doscan_funs_t {
3378 gmp_doscan_scan_t scan;
3379 gmp_doscan_step_t step;
3380 gmp_doscan_get_t get;
3381 gmp_doscan_unget_t unget;
3383 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
3384 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
3386 #if _GMP_H_HAVE_VA_LIST
3387 int __gmp_doscan _PROTO ((const struct gmp_doscan_funs_t *, void *,
3388 const char *, va_list));
3392 /* For testing and debugging. */
3393 #define MPZ_CHECK_FORMAT(z) \
3395 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
3396 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
3397 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
3400 #define MPQ_CHECK_FORMAT(q) \
3402 MPZ_CHECK_FORMAT (mpq_numref (q)); \
3403 MPZ_CHECK_FORMAT (mpq_denref (q)); \
3404 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
3406 if (SIZ(mpq_numref(q)) == 0) \
3408 /* should have zero as 0/1 */ \
3409 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
3410 && PTR(mpq_denref(q))[0] == 1); \
3414 /* should have no common factors */ \
3417 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
3418 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
3423 #define MPF_CHECK_FORMAT(f) \
3425 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
3426 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
3428 ASSERT_ALWAYS (EXP(f) == 0); \
3430 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
3434 #define MPZ_PROVOKE_REALLOC(z) \
3435 do { ALLOC(z) = ABSIZ(z); } while (0)
3438 /* Enhancement: The "mod" and "gcd_1" functions below could have
3439 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
3440 function pointers, only actual functions. It probably doesn't make much
3441 difference to the gmp code, since hopefully we arrange calls so there's
3442 no great need for the compiler to move things around. */
3444 #if WANT_FAT_BINARY && HAVE_HOST_CPU_FAMILY_x86
3445 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
3446 in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */
3448 DECL_add_n ((*add_n));
3449 DECL_addmul_1 ((*addmul_1));
3450 DECL_copyd ((*copyd));
3451 DECL_copyi ((*copyi));
3452 DECL_divexact_1 ((*divexact_1));
3453 DECL_divexact_by3c ((*divexact_by3c));
3454 DECL_divrem_1 ((*divrem_1));
3455 DECL_gcd_1 ((*gcd_1));
3456 DECL_lshift ((*lshift));
3457 DECL_mod_1 ((*mod_1));
3458 DECL_mod_34lsub1 ((*mod_34lsub1));
3459 DECL_modexact_1c_odd ((*modexact_1c_odd));
3460 DECL_mul_1 ((*mul_1));
3461 DECL_mul_basecase ((*mul_basecase));
3462 DECL_preinv_divrem_1 ((*preinv_divrem_1));
3463 DECL_preinv_mod_1 ((*preinv_mod_1));
3464 DECL_rshift ((*rshift));
3465 DECL_sqr_basecase ((*sqr_basecase));
3466 DECL_sub_n ((*sub_n));
3467 DECL_submul_1 ((*submul_1));
3469 mp_size_t mul_karatsuba_threshold;
3470 mp_size_t mul_toom3_threshold;
3471 mp_size_t sqr_karatsuba_threshold;
3472 mp_size_t sqr_toom3_threshold;
3474 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
3475 #endif /* x86 fat binary */
3477 void __gmpn_cpuvec_init __GMP_PROTO ((void));
3479 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
3480 if that hasn't yet been done (to establish the right values). */
3481 #define CPUVEC_THRESHOLD(field) \
3482 ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
3483 __gmpn_cpuvec.field)
3487 #if TUNE_PROGRAM_BUILD
3488 /* Some extras wanted when recompiling some .c files for use by the tune
3489 program. Not part of a normal build.
3491 It's necessary to keep these thresholds as #defines (just to an
3492 identically named variable), since various defaults are established based
3493 on #ifdef in the .c files. For some this is not so (the defaults are
3494 instead establshed above), but all are done this way for consistency. */
3496 #undef MUL_KARATSUBA_THRESHOLD
3497 #define MUL_KARATSUBA_THRESHOLD mul_karatsuba_threshold
3498 extern mp_size_t mul_karatsuba_threshold;
3500 #undef MUL_TOOM3_THRESHOLD
3501 #define MUL_TOOM3_THRESHOLD mul_toom3_threshold
3502 extern mp_size_t mul_toom3_threshold;
3504 #undef MUL_FFT_THRESHOLD
3505 #define MUL_FFT_THRESHOLD mul_fft_threshold
3506 extern mp_size_t mul_fft_threshold;
3508 #undef MUL_FFT_MODF_THRESHOLD
3509 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
3510 extern mp_size_t mul_fft_modf_threshold;
3512 #undef MUL_FFT_TABLE
3513 #define MUL_FFT_TABLE { 0 }
3515 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
3516 remain as zero (always use it). */
3517 #if ! HAVE_NATIVE_mpn_sqr_basecase
3518 #undef SQR_BASECASE_THRESHOLD
3519 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold
3520 extern mp_size_t sqr_basecase_threshold;
3523 #if TUNE_PROGRAM_BUILD_SQR
3524 #undef SQR_KARATSUBA_THRESHOLD
3525 #define SQR_KARATSUBA_THRESHOLD SQR_KARATSUBA_MAX_GENERIC
3527 #undef SQR_KARATSUBA_THRESHOLD
3528 #define SQR_KARATSUBA_THRESHOLD sqr_karatsuba_threshold
3529 extern mp_size_t sqr_karatsuba_threshold;
3532 #undef SQR_TOOM3_THRESHOLD
3533 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold
3534 extern mp_size_t sqr_toom3_threshold;
3536 #undef SQR_FFT_THRESHOLD
3537 #define SQR_FFT_THRESHOLD sqr_fft_threshold
3538 extern mp_size_t sqr_fft_threshold;
3540 #undef SQR_FFT_MODF_THRESHOLD
3541 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
3542 extern mp_size_t sqr_fft_modf_threshold;
3544 #undef SQR_FFT_TABLE
3545 #define SQR_FFT_TABLE { 0 }
3547 #undef MULLOW_BASECASE_THRESHOLD
3548 #define MULLOW_BASECASE_THRESHOLD mullow_basecase_threshold
3549 extern mp_size_t mullow_basecase_threshold;
3551 #undef MULLOW_DC_THRESHOLD
3552 #define MULLOW_DC_THRESHOLD mullow_dc_threshold
3553 extern mp_size_t mullow_dc_threshold;
3555 #undef MULLOW_MUL_N_THRESHOLD
3556 #define MULLOW_MUL_N_THRESHOLD mullow_mul_n_threshold
3557 extern mp_size_t mullow_mul_n_threshold;
3560 #if ! UDIV_PREINV_ALWAYS
3561 #undef DIV_SB_PREINV_THRESHOLD
3562 #define DIV_SB_PREINV_THRESHOLD div_sb_preinv_threshold
3563 extern mp_size_t div_sb_preinv_threshold;
3566 #undef DIV_DC_THRESHOLD
3567 #define DIV_DC_THRESHOLD div_dc_threshold
3568 extern mp_size_t div_dc_threshold;
3570 #undef POWM_THRESHOLD
3571 #define POWM_THRESHOLD powm_threshold
3572 extern mp_size_t powm_threshold;
3574 #undef GCD_ACCEL_THRESHOLD
3575 #define GCD_ACCEL_THRESHOLD gcd_accel_threshold
3576 extern mp_size_t gcd_accel_threshold;
3578 #undef GCDEXT_THRESHOLD
3579 #define GCDEXT_THRESHOLD gcdext_threshold
3580 extern mp_size_t gcdext_threshold;
3582 #undef DIVREM_1_NORM_THRESHOLD
3583 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
3584 extern mp_size_t divrem_1_norm_threshold;
3586 #undef DIVREM_1_UNNORM_THRESHOLD
3587 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
3588 extern mp_size_t divrem_1_unnorm_threshold;
3590 #undef MOD_1_NORM_THRESHOLD
3591 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
3592 extern mp_size_t mod_1_norm_threshold;
3594 #undef MOD_1_UNNORM_THRESHOLD
3595 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
3596 extern mp_size_t mod_1_unnorm_threshold;
3598 #if ! UDIV_PREINV_ALWAYS
3599 #undef DIVREM_2_THRESHOLD
3600 #define DIVREM_2_THRESHOLD divrem_2_threshold
3601 extern mp_size_t divrem_2_threshold;
3604 #undef GET_STR_DC_THRESHOLD
3605 #define GET_STR_DC_THRESHOLD get_str_dc_threshold
3606 extern mp_size_t get_str_dc_threshold;
3608 #undef GET_STR_PRECOMPUTE_THRESHOLD
3609 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
3610 extern mp_size_t get_str_precompute_threshold;
3612 #undef SET_STR_THRESHOLD
3613 #define SET_STR_THRESHOLD set_str_threshold
3614 extern mp_size_t SET_STR_THRESHOLD;
3616 #undef FFT_TABLE_ATTRS
3617 #define FFT_TABLE_ATTRS
3618 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
3620 /* Sizes the tune program tests up to, used in a couple of recompilations. */
3621 #undef MUL_KARATSUBA_THRESHOLD_LIMIT
3622 #undef MUL_TOOM3_THRESHOLD_LIMIT
3623 #undef MULLOW_BASECASE_THRESHOLD_LIMIT
3624 #undef SQR_TOOM3_THRESHOLD_LIMIT
3625 #define SQR_KARATSUBA_MAX_GENERIC 200
3626 #define MUL_KARATSUBA_THRESHOLD_LIMIT 700
3627 #define MUL_TOOM3_THRESHOLD_LIMIT 700
3628 #define MULLOW_BASECASE_THRESHOLD_LIMIT 200
3629 #define SQR_TOOM3_THRESHOLD_LIMIT 400
3630 #define GET_STR_THRESHOLD_LIMIT 150
3632 /* "thresh" will normally be a variable when tuning, so use the cached
3633 result. This helps mpn_sb_divrem_mn for instance. */
3634 #undef CACHED_ABOVE_THRESHOLD
3635 #define CACHED_ABOVE_THRESHOLD(cache, thresh) (cache)
3636 #undef CACHED_BELOW_THRESHOLD
3637 #define CACHED_BELOW_THRESHOLD(cache, thresh) (cache)
3639 #endif /* TUNE_PROGRAM_BUILD */
3641 #if defined (__cplusplus)
3648 /* A little helper for a null-terminated __gmp_allocate_func string.
3649 The destructor ensures it's freed even if an exception is thrown.
3650 The len field is needed by the destructor, and can be used by anyone else
3651 to avoid a second strlen pass over the data.
3653 Since our input is a C string, using strlen is correct. Perhaps it'd be
3654 more C++-ish style to use std::char_traits<char>::length, but char_traits
3655 isn't available in gcc 2.95.4. */
3657 class gmp_allocated_string {
3661 gmp_allocated_string(char *arg)
3664 len = std::strlen (str);
3666 ~gmp_allocated_string()
3668 (*__gmp_free_func) (str, len+1);
3672 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
3673 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
3674 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
3675 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
3676 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
3677 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
3679 #endif /* __cplusplus */
3681 #endif /* __GMP_IMPL_H__ */