1 /* Include file for internal GNU MP types and definitions.
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
7 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
9 This file is part of the GNU MP Library.
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
25 /* __GMP_DECLSPEC must be given on any global data that will be accessed
26 from outside libgmp, meaning from the test or development programs, or
27 from libgmpxx. Failing to do this will result in an incorrect address
28 being used for the accesses. On functions __GMP_DECLSPEC makes calls
29 from outside libgmp more efficient, but they'll still work fine without
33 #ifndef __GMP_IMPL_H__
34 #define __GMP_IMPL_H__
37 #include <intrinsics.h> /* for _popcnt */
40 /* limits.h is not used in general, since it's an ANSI-ism, and since on
41 solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
42 values (the ABI=64 values).
44 On Cray vector systems, however, we need the system limits.h since sizes
45 of signed and unsigned types can differ there, depending on compiler
46 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For
47 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
48 short can be 24, 32, 46 or 64 bits, and different for ushort. */
54 /* For fat.h and other fat binary stuff.
55 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
56 declared this way are only used to set function pointers in __gmp_cpuvec,
57 they're not called directly. */
58 #define DECL_add_n(name) \
59 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
60 #define DECL_addmul_1(name) \
61 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
62 #define DECL_copyd(name) \
63 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
64 #define DECL_copyi(name) \
66 #define DECL_divexact_1(name) \
67 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
68 #define DECL_divexact_by3c(name) \
69 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
70 #define DECL_divrem_1(name) \
71 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
72 #define DECL_gcd_1(name) \
73 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
74 #define DECL_lshift(name) \
75 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
76 #define DECL_mod_1(name) \
77 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
78 #define DECL_mod_34lsub1(name) \
79 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
80 #define DECL_modexact_1c_odd(name) \
81 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
82 #define DECL_mul_1(name) \
84 #define DECL_mul_basecase(name) \
85 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
86 #define DECL_preinv_divrem_1(name) \
87 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
88 #define DECL_preinv_mod_1(name) \
89 __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
90 #define DECL_rshift(name) \
92 #define DECL_sqr_basecase(name) \
93 __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
94 #define DECL_sub_n(name) \
96 #define DECL_submul_1(name) \
99 #if ! __GMP_WITHIN_CONFIGURE
101 #include "gmp-mparam.h"
102 #include "fib_table.h"
103 #include "mp_bases.h"
109 #if HAVE_INTTYPES_H /* for uint_least32_t */
110 # include <inttypes.h>
118 #include <cstring> /* for strlen */
119 #include <string> /* for std::string */
123 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */
124 #define WANT_TMP_DEBUG 0
127 /* The following tries to get a good version of alloca. The tests are
128 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
129 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
130 be setup appropriately.
132 ifndef alloca - a cpp define might already exist.
133 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
134 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
136 GCC __builtin_alloca - preferred whenever available.
138 _AIX pragma - IBM compilers need a #pragma in "each module that needs to
139 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
140 used in past versions of GMP, retained still in case it matters.
142 The autoconf manual says this pragma needs to be at the start of a C
143 file, apart from comments and preprocessor directives. Is that true?
144 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
150 # define alloca __builtin_alloca
153 # define alloca(x) __ALLOCA(x)
157 # define alloca _alloca
162 # if defined (_AIX) || defined (_IBMR2)
174 /* if not provided by gmp-mparam.h */
175 #ifndef BYTES_PER_MP_LIMB
176 #define BYTES_PER_MP_LIMB SIZEOF_MP_LIMB_T
178 #define GMP_LIMB_BYTES BYTES_PER_MP_LIMB
179 #ifndef GMP_LIMB_BITS
180 #define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T)
183 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG)
186 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
187 #if HAVE_UINT_LEAST32_T
188 typedef uint_least32_t gmp_uint_least32_t;
190 #if SIZEOF_UNSIGNED_SHORT >= 4
191 typedef unsigned short gmp_uint_least32_t;
193 #if SIZEOF_UNSIGNED >= 4
194 typedef unsigned gmp_uint_least32_t;
196 typedef unsigned long gmp_uint_least32_t;
202 /* gmp_intptr_t, for pointer to integer casts */
204 typedef intptr_t gmp_intptr_t;
206 typedef size_t gmp_intptr_t;
210 /* pre-inverse types for truncating division and modulo */
211 typedef struct {mp_limb_t inv32;} gmp_pi1_t;
212 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
215 /* const and signed must match __gmp_const and __gmp_signed, so follow the
216 decision made for those in gmp.h. */
217 #if ! __GMP_HAVE_CONST
218 #define const /* empty */
219 #define signed /* empty */
222 /* "const" basically means a function does nothing but examine its arguments
223 and give a return value, it doesn't read or write any memory (neither
224 global nor pointed to by arguments), and has no other side-effects. This
225 is more restrictive than "pure". See info node "(gcc)Function
226 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
227 this off when trying to write timing loops. */
228 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
229 #define ATTRIBUTE_CONST __attribute__ ((const))
231 #define ATTRIBUTE_CONST
234 #if HAVE_ATTRIBUTE_NORETURN
235 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
237 #define ATTRIBUTE_NORETURN
240 /* "malloc" means a function behaves like malloc in that the pointer it
241 returns doesn't alias anything. */
242 #if HAVE_ATTRIBUTE_MALLOC
243 #define ATTRIBUTE_MALLOC __attribute__ ((malloc))
245 #define ATTRIBUTE_MALLOC
250 #define strchr(s,c) index(s,c)
254 #define memset(p, c, n) \
257 char *__memset__p = (p); \
259 for (__i = 0; __i < (n); __i++) \
260 __memset__p[__i] = (c); \
264 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
265 mode. Falling back to a memcpy will give maximum portability, since it
266 works no matter whether va_list is a pointer, struct or array. */
267 #if ! defined (va_copy) && defined (__va_copy)
268 #define va_copy(dst,src) __va_copy(dst,src)
270 #if ! defined (va_copy)
271 #define va_copy(dst,src) \
272 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
276 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
277 (ie. ctlz, ctpop, cttz). */
278 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \
279 || HAVE_HOST_CPU_alphaev7
280 #define HAVE_HOST_CPU_alpha_CIX 1
284 #if defined (__cplusplus)
291 ptr = TMP_ALLOC (bytes);
294 Small allocations should use TMP_SALLOC, big allocations should use
295 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
297 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
300 TMP_DECL just declares a variable, but might be empty and so must be last
301 in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
302 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
303 TMP_MARK was made, but then no TMP_ALLOCs. */
305 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
306 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC
307 isn't used for "double"s, so that's not in the union. */
312 #define __TMP_ALIGN sizeof (union tmp_align_t)
314 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
315 "a" must be an unsigned type.
316 This is designed for use with a compile-time constant "m".
317 The POW2 case is expected to be usual, and gcc 3.0 and up recognises
318 "(-(8*n))%8" or the like is always zero, which means the rounding up in
319 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */
320 #define ROUND_UP_MULTIPLE(a,m) \
321 (POW2_P(m) ? (a) + (-(a))%(m) \
322 : (a)+(m)-1 - (((a)+(m)-1) % (m)))
324 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
325 struct tmp_reentrant_t {
326 struct tmp_reentrant_t *next;
327 size_t size; /* bytes, including header */
329 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
330 __GMP_DECLSPEC void __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
335 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
337 #define TMP_MARK __tmp_marker = 0
338 #define TMP_SALLOC(n) alloca(n)
339 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
340 #define TMP_ALLOC(n) \
341 (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
345 if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
349 #if WANT_TMP_REENTRANT
350 #define TMP_SDECL TMP_DECL
351 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
352 #define TMP_SMARK TMP_MARK
353 #define TMP_MARK __tmp_marker = 0
354 #define TMP_SALLOC(n) TMP_ALLOC(n)
355 #define TMP_BALLOC(n) TMP_ALLOC(n)
356 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
357 #define TMP_SFREE TMP_FREE
358 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker)
361 #if WANT_TMP_NOTREENTRANT
364 struct tmp_stack *which_chunk;
367 __GMP_DECLSPEC void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
368 __GMP_DECLSPEC void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
369 __GMP_DECLSPEC void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
370 #define TMP_SDECL TMP_DECL
371 #define TMP_DECL struct tmp_marker __tmp_marker
372 #define TMP_SMARK TMP_MARK
373 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker)
374 #define TMP_SALLOC(n) TMP_ALLOC(n)
375 #define TMP_BALLOC(n) TMP_ALLOC(n)
376 #define TMP_ALLOC(n) \
377 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
378 #define TMP_SFREE TMP_FREE
379 #define TMP_FREE __gmp_tmp_free (&__tmp_marker)
383 /* See tal-debug.c for some comments. */
385 struct tmp_debug_entry_t *list;
389 struct tmp_debug_entry_t {
390 struct tmp_debug_entry_t *next;
394 __GMP_DECLSPEC void __gmp_tmp_debug_mark __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
395 struct tmp_debug_t *,
396 const char *, const char *));
397 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
398 struct tmp_debug_t **, const char *,
399 size_t)) ATTRIBUTE_MALLOC;
400 __GMP_DECLSPEC void __gmp_tmp_debug_free __GMP_PROTO ((const char *, int, int,
401 struct tmp_debug_t **,
402 const char *, const char *));
403 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
404 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
405 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
406 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
407 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
408 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
409 /* The marker variable is designed to provoke an uninitialized variable
410 warning from the compiler if TMP_FREE is used without a TMP_MARK.
411 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
412 these things up too. */
413 #define TMP_DECL_NAME(marker, marker_name) \
415 int __tmp_marker_inscope; \
416 const char *__tmp_marker_name = marker_name; \
417 struct tmp_debug_t __tmp_marker_struct; \
418 /* don't demand NULL, just cast a zero */ \
419 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
420 #define TMP_MARK_NAME(marker, marker_name) \
423 __tmp_marker_inscope = 1; \
424 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
425 &__tmp_marker, &__tmp_marker_struct, \
426 __tmp_marker_name, marker_name); \
428 #define TMP_SALLOC(n) TMP_ALLOC(n)
429 #define TMP_BALLOC(n) TMP_ALLOC(n)
430 #define TMP_ALLOC(size) \
431 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
432 __tmp_marker_inscope, \
433 &__tmp_marker, __tmp_marker_name, size)
434 #define TMP_FREE_NAME(marker, marker_name) \
436 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
437 marker, &__tmp_marker, \
438 __tmp_marker_name, marker_name); \
440 #endif /* WANT_TMP_DEBUG */
443 /* Allocating various types. */
444 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
445 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
446 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
447 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
448 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t)
449 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t)
450 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
451 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr)
452 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr)
454 /* It's more efficient to allocate one block than two. This is certainly
455 true of the malloc methods, but it can even be true of alloca if that
456 involves copying a chunk of stack (various RISCs), or a call to a stack
457 bounds check (mingw). In any case, when debugging keep separate blocks
458 so a redzoning malloc debugger can protect each individually. */
459 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
461 if (WANT_TMP_DEBUG) \
463 (xp) = TMP_ALLOC_LIMBS (xsize); \
464 (yp) = TMP_ALLOC_LIMBS (ysize); \
468 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
469 (yp) = (xp) + (xsize); \
474 /* From gmp.h, nicer names for internal use. */
475 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str)
476 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
477 #define LIKELY(cond) __GMP_LIKELY(cond)
478 #define UNLIKELY(cond) __GMP_UNLIKELY(cond)
480 #define ABS(x) ((x) >= 0 ? (x) : -(x))
482 #define MIN(l,o) ((l) < (o) ? (l) : (o))
484 #define MAX(h,i) ((h) > (i) ? (h) : (i))
485 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
487 /* Field access macros. */
488 #define SIZ(x) ((x)->_mp_size)
489 #define ABSIZ(x) ABS (SIZ (x))
490 #define PTR(x) ((x)->_mp_d)
491 #define LIMBS(x) ((x)->_mp_d)
492 #define EXP(x) ((x)->_mp_exp)
493 #define PREC(x) ((x)->_mp_prec)
494 #define ALLOC(x) ((x)->_mp_alloc)
496 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
497 then that lowest one bit must have been the only bit set. n==0 will
498 return true though, so avoid that. */
499 #define POW2_P(n) (((n) & ((n) - 1)) == 0)
502 /* The "short" defines are a bit different because shorts are promoted to
505 #ifndef's are used since on some systems (HP?) header files other than
506 limits.h setup these defines. We could forcibly #undef in that case, but
507 there seems no need to worry about that. */
510 #define ULONG_MAX __GMP_ULONG_MAX
513 #define UINT_MAX __GMP_UINT_MAX
516 #define USHRT_MAX __GMP_USHRT_MAX
518 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
520 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
521 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
522 treats the plain decimal values in <limits.h> as signed. */
523 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
524 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
525 #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
526 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
529 #define LONG_MIN ((long) ULONG_HIGHBIT)
532 #define LONG_MAX (-(LONG_MIN+1))
536 #define INT_MIN ((int) UINT_HIGHBIT)
539 #define INT_MAX (-(INT_MIN+1))
543 #define SHRT_MIN ((short) USHRT_HIGHBIT)
546 #define SHRT_MAX ((short) (-(SHRT_MIN+1)))
549 #if __GMP_MP_SIZE_T_INT
550 #define MP_SIZE_T_MAX INT_MAX
551 #define MP_SIZE_T_MIN INT_MIN
553 #define MP_SIZE_T_MAX LONG_MAX
554 #define MP_SIZE_T_MIN LONG_MIN
557 /* mp_exp_t is the same as mp_size_t */
558 #define MP_EXP_T_MAX MP_SIZE_T_MAX
559 #define MP_EXP_T_MIN MP_SIZE_T_MIN
561 #define LONG_HIGHBIT LONG_MIN
562 #define INT_HIGHBIT INT_MIN
563 #define SHRT_HIGHBIT SHRT_MIN
566 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
568 #if GMP_NAIL_BITS == 0
569 #define GMP_NAIL_LOWBIT CNST_LIMB(0)
571 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
574 #if GMP_NAIL_BITS != 0
575 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
576 code that has not yet been qualified. */
578 #undef DC_DIV_QR_THRESHOLD
579 #define DC_DIV_QR_THRESHOLD 50
581 #undef DIVREM_1_NORM_THRESHOLD
582 #undef DIVREM_1_UNNORM_THRESHOLD
583 #undef MOD_1_NORM_THRESHOLD
584 #undef MOD_1_UNNORM_THRESHOLD
585 #undef USE_PREINV_DIVREM_1
586 #undef DIVREM_2_THRESHOLD
587 #undef DIVEXACT_1_THRESHOLD
588 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
589 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
590 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
591 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
592 #define USE_PREINV_DIVREM_1 0 /* no preinv */
593 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
595 /* mpn/generic/mul_fft.c is not nails-capable. */
596 #undef MUL_FFT_THRESHOLD
597 #undef SQR_FFT_THRESHOLD
598 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
599 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
604 #define MP_LIMB_T_SWAP(x, y) \
606 mp_limb_t __mp_limb_t_swap__tmp = (x); \
608 (y) = __mp_limb_t_swap__tmp; \
610 #define MP_SIZE_T_SWAP(x, y) \
612 mp_size_t __mp_size_t_swap__tmp = (x); \
614 (y) = __mp_size_t_swap__tmp; \
617 #define MP_PTR_SWAP(x, y) \
619 mp_ptr __mp_ptr_swap__tmp = (x); \
621 (y) = __mp_ptr_swap__tmp; \
623 #define MP_SRCPTR_SWAP(x, y) \
625 mp_srcptr __mp_srcptr_swap__tmp = (x); \
627 (y) = __mp_srcptr_swap__tmp; \
630 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
632 MP_PTR_SWAP (xp, yp); \
633 MP_SIZE_T_SWAP (xs, ys); \
635 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
637 MP_SRCPTR_SWAP (xp, yp); \
638 MP_SIZE_T_SWAP (xs, ys); \
641 #define MPZ_PTR_SWAP(x, y) \
643 mpz_ptr __mpz_ptr_swap__tmp = (x); \
645 (y) = __mpz_ptr_swap__tmp; \
647 #define MPZ_SRCPTR_SWAP(x, y) \
649 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
651 (y) = __mpz_srcptr_swap__tmp; \
655 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
656 but current gcc (3.0) doesn't seem to support that. */
657 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
658 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
659 __GMP_DECLSPEC extern void (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
661 __GMP_DECLSPEC void *__gmp_default_allocate __GMP_PROTO ((size_t));
662 __GMP_DECLSPEC void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
663 __GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t));
665 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
666 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
667 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
669 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
670 ((type *) (*__gmp_reallocate_func) \
671 (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
672 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
673 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
675 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
676 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
678 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
680 if ((oldsize) != (newsize)) \
681 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
684 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
686 if ((oldsize) != (newsize)) \
687 (ptr) = (type *) (*__gmp_reallocate_func) \
688 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
692 /* Dummy for non-gcc, code involving it will go dead. */
693 #if ! defined (__GNUC__) || __GNUC__ < 2
694 #define __builtin_constant_p(x) 0
698 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
699 stack usage is compatible. __attribute__ ((regparm (N))) helps by
700 putting leading parameters in registers, avoiding extra stack.
702 regparm cannot be used with calls going through the PLT, because the
703 binding code there may clobber the registers (%eax, %edx, %ecx) used for
704 the regparm parameters. Calls to local (ie. static) functions could
705 still use this, if we cared to differentiate locals and globals.
707 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
708 -p or -pg profiling, since that version of gcc doesn't realize the
709 .mcount calls will clobber the parameter registers. Other systems are
710 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
711 bother to try to detect this. regparm is only an optimization so we just
712 disable it when profiling (profiling being a slowdown anyway). */
714 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
715 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
716 #define USE_LEADING_REGPARM 1
718 #define USE_LEADING_REGPARM 0
721 /* Macros for altering parameter order according to regparm usage. */
722 #if USE_LEADING_REGPARM
723 #define REGPARM_2_1(a,b,x) x,a,b
724 #define REGPARM_3_1(a,b,c,x) x,a,b,c
725 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
727 #define REGPARM_2_1(a,b,x) a,b,x
728 #define REGPARM_3_1(a,b,c,x) a,b,c,x
729 #define REGPARM_ATTR(n)
733 /* ASM_L gives a local label for a gcc asm block, for use when temporary
734 local labels like "1:" might not be available, which is the case for
735 instance on the x86s (the SCO assembler doesn't support them).
737 The label generated is made unique by including "%=" which is a unique
738 number for each insn. This ensures the same name can be used in multiple
739 asm blocks, perhaps via a macro. Since jumps between asm blocks are not
740 allowed there's no need for a label to be usable outside a single
743 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
746 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
748 /* FIXME: Check that these actually improve things.
749 FIXME: Need a cld after each std.
750 FIXME: Can't have inputs in clobbered registers, must describe them as
751 dummy outputs, and add volatile. */
752 #define MPN_COPY_INCR(DST, SRC, N) \
753 __asm__ ("cld\n\trep\n\tmovsl" : : \
754 "D" (DST), "S" (SRC), "c" (N) : \
755 "cx", "di", "si", "memory")
756 #define MPN_COPY_DECR(DST, SRC, N) \
757 __asm__ ("std\n\trep\n\tmovsl" : : \
758 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
759 "cx", "di", "si", "memory")
764 __GMP_DECLSPEC void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
765 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
767 #define mpz_n_pow_ui __gmpz_n_pow_ui
768 __GMP_DECLSPEC void mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
771 #define mpn_addmul_1c __MPN(addmul_1c)
772 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
774 #define mpn_addmul_2 __MPN(addmul_2)
775 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
777 #define mpn_addmul_3 __MPN(addmul_3)
778 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
780 #define mpn_addmul_4 __MPN(addmul_4)
781 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
783 #define mpn_addmul_5 __MPN(addmul_5)
784 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
786 #define mpn_addmul_6 __MPN(addmul_6)
787 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
789 #define mpn_addmul_7 __MPN(addmul_7)
790 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
792 #define mpn_addmul_8 __MPN(addmul_8)
793 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
795 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
796 returns the carry out (0, 1 or 2). */
797 #define mpn_addlsh1_n __MPN(addlsh1_n)
798 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
800 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
801 returns the carry out (0, ..., 4). */
802 #define mpn_addlsh2_n __MPN(addlsh2_n)
803 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
805 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
806 returns the carry out (0, ..., 2^k). */
807 #define mpn_addlsh_n __MPN(addlsh_n)
808 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
810 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
811 returns the borrow out (0, 1 or 2). */
812 #define mpn_sublsh1_n __MPN(sublsh1_n)
813 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
815 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
816 returns the carry out (-1, 0, 1). */
817 #define mpn_rsblsh1_n __MPN(rsblsh1_n)
818 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
820 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
821 returns the borrow out (FIXME 0, 1, 2 or 3). */
822 #define mpn_sublsh2_n __MPN(sublsh2_n)
823 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
825 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
826 returns the carry out (-1, ..., 3). */
827 #define mpn_rsblsh2_n __MPN(rsblsh2_n)
828 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
830 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
831 returns the carry out (-1, 0, ..., 2^k-1). */
832 #define mpn_rsblsh_n __MPN(rsblsh_n)
833 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
835 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
836 and returns the bit rshifted out (0 or 1). */
837 #define mpn_rsh1add_n __MPN(rsh1add_n)
838 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
839 #define mpn_rsh1add_nc __MPN(rsh1add_nc)
840 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
842 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
843 and returns the bit rshifted out (0 or 1). If there's a borrow from the
844 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
845 complement negative. */
846 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
847 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
848 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
849 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
851 #define mpn_lshiftc __MPN(lshiftc)
852 __GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
854 #define mpn_add_n_sub_n __MPN(add_n_sub_n)
855 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
857 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
858 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
860 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
861 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
863 #define mpn_divrem_1c __MPN(divrem_1c)
864 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
866 #define mpn_dump __MPN(dump)
867 __GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
869 #define mpn_fib2_ui __MPN(fib2_ui)
870 __GMP_DECLSPEC mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
872 /* Remap names of internal mpn functions. */
873 #define __clz_tab __MPN(clz_tab)
874 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
876 #define mpn_jacobi_base __MPN(jacobi_base)
877 __GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
879 #define mpn_mod_1c __MPN(mod_1c)
880 __GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
882 #define mpn_mul_1c __MPN(mul_1c)
883 __GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
885 #define mpn_mul_2 __MPN(mul_2)
886 __GMP_DECLSPEC mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
888 #define mpn_mul_3 __MPN(mul_3)
889 __GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
891 #define mpn_mul_4 __MPN(mul_4)
892 __GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
894 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
895 #define mpn_mul_basecase __MPN(mul_basecase)
896 __GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
899 #define mpn_mullo_n __MPN(mullo_n)
900 __GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
902 #define mpn_mullo_basecase __MPN(mullo_basecase)
903 __GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
905 #define mpn_sqr __MPN(sqr)
906 __GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
908 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
909 #define mpn_sqr_basecase __MPN(sqr_basecase)
910 __GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
913 #define mpn_submul_1c __MPN(submul_1c)
914 __GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
916 #define mpn_redc_1 __MPN(redc_1)
917 __GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
919 #define mpn_redc_2 __MPN(redc_2)
920 __GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
921 #define mpn_redc_n __MPN(redc_n)
922 __GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
925 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
926 __GMP_DECLSPEC void mpn_mod_1_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
927 #define mpn_mod_1_1p __MPN(mod_1_1p)
928 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4])) __GMP_ATTRIBUTE_PURE;
930 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
931 __GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
932 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
933 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5])) __GMP_ATTRIBUTE_PURE;
935 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
936 __GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
937 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
938 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6])) __GMP_ATTRIBUTE_PURE;
940 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
941 __GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
942 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
943 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7])) __GMP_ATTRIBUTE_PURE;
945 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
946 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
947 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
948 __GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
949 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
950 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
951 static inline mp_size_t
952 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
956 (an > n ? (bn > n ? rn : n) : 0);
960 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
961 __GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
962 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
963 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
964 static inline mp_size_t
965 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
973 typedef __gmp_randstate_struct *gmp_randstate_ptr;
974 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
976 /* Pseudo-random number generator function pointers structure. */
978 void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
979 void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
980 void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
981 void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
984 /* Macro to obtain a void pointer to the function pointers structure. */
985 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
987 /* Macro to obtain a pointer to the generator's state.
988 When used as a lvalue the rvalue needs to be cast to mp_ptr. */
989 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
991 /* Write a given number of random bits to rp. */
992 #define _gmp_rand(rp, state, bits) \
994 gmp_randstate_ptr __rstate = (state); \
995 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
996 (__rstate, rp, bits); \
999 __GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
1002 /* __gmp_rands is the global state for the old-style random functions, and
1003 is also used in the test programs (hence the __GMP_DECLSPEC).
1005 There's no seeding here, so mpz_random etc will generate the same
1006 sequence every time. This is not unlike the C library random functions
1007 if you don't seed them, so perhaps it's acceptable. Digging up a seed
1008 from /dev/random or the like would work on many systems, but might
1009 encourage a false confidence, since it'd be pretty much impossible to do
1010 something that would work reliably everywhere. In any case the new style
1011 functions are recommended to applications which care about randomness, so
1012 the old functions aren't too important. */
1014 __GMP_DECLSPEC extern char __gmp_rands_initialized;
1015 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
1018 ((__gmp_rands_initialized ? 0 \
1019 : (__gmp_rands_initialized = 1, \
1020 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
1023 /* this is used by the test programs, to free memory */
1024 #define RANDS_CLEAR() \
1026 if (__gmp_rands_initialized) \
1028 __gmp_rands_initialized = 0; \
1029 gmp_randclear (__gmp_rands); \
1034 /* For a threshold between algorithms A and B, size>=thresh is where B
1035 should be used. Special value MP_SIZE_T_MAX means only ever use A, or
1036 value 0 means only ever use B. The tests for these special values will
1037 be compile-time constants, so the compiler should be able to eliminate
1038 the code for the unwanted algorithm. */
1040 #define ABOVE_THRESHOLD(size,thresh) \
1042 || ((thresh) != MP_SIZE_T_MAX \
1043 && (size) >= (thresh)))
1044 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
1046 #define MPN_TOOM22_MUL_MINSIZE 4
1047 #define MPN_TOOM2_SQR_MINSIZE 4
1049 #define MPN_TOOM33_MUL_MINSIZE 17
1050 #define MPN_TOOM3_SQR_MINSIZE 17
1052 #define MPN_TOOM44_MUL_MINSIZE 30
1053 #define MPN_TOOM4_SQR_MINSIZE 30
1055 #define MPN_TOOM6H_MUL_MINSIZE 46
1056 #define MPN_TOOM6_SQR_MINSIZE 46
1058 #define MPN_TOOM8H_MUL_MINSIZE 86
1059 #define MPN_TOOM8_SQR_MINSIZE 86
1061 #define MPN_TOOM32_MUL_MINSIZE 10
1062 #define MPN_TOOM42_MUL_MINSIZE 10
1063 #define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */
1064 #define MPN_TOOM53_MUL_MINSIZE 49 /* ??? */
1065 #define MPN_TOOM63_MUL_MINSIZE 49
1067 #define mpn_sqr_diagonal __MPN(sqr_diagonal)
1068 __GMP_DECLSPEC void mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1070 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1071 __GMP_DECLSPEC void mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t));
1073 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
1074 #define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
1075 __GMP_DECLSPEC void mpn_toom_interpolate_6pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t));
1077 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
1078 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1079 __GMP_DECLSPEC void mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1081 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
1082 __GMP_DECLSPEC void mpn_toom_interpolate_8pts __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
1084 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
1085 __GMP_DECLSPEC void mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
1087 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
1088 __GMP_DECLSPEC void mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
1090 #define mpn_toom_couple_handling __MPN(toom_couple_handling)
1091 __GMP_DECLSPEC void mpn_toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
1093 #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
1094 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1096 #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
1097 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1099 #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
1100 __GMP_DECLSPEC int mpn_toom_eval_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1102 #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
1103 __GMP_DECLSPEC int mpn_toom_eval_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1105 #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
1106 __GMP_DECLSPEC int mpn_toom_eval_pm2exp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
1108 #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
1109 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
1111 #define mpn_toom22_mul __MPN(toom22_mul)
1112 __GMP_DECLSPEC void mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1114 #define mpn_toom32_mul __MPN(toom32_mul)
1115 __GMP_DECLSPEC void mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1117 #define mpn_toom42_mul __MPN(toom42_mul)
1118 __GMP_DECLSPEC void mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1120 #define mpn_toom52_mul __MPN(toom52_mul)
1121 __GMP_DECLSPEC void mpn_toom52_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1123 #define mpn_toom62_mul __MPN(toom62_mul)
1124 __GMP_DECLSPEC void mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1126 #define mpn_toom2_sqr __MPN(toom2_sqr)
1127 __GMP_DECLSPEC void mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1129 #define mpn_toom33_mul __MPN(toom33_mul)
1130 __GMP_DECLSPEC void mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1132 #define mpn_toom43_mul __MPN(toom43_mul)
1133 __GMP_DECLSPEC void mpn_toom43_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1135 #define mpn_toom53_mul __MPN(toom53_mul)
1136 __GMP_DECLSPEC void mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1138 #define mpn_toom63_mul __MPN(toom63_mul)
1139 __GMP_DECLSPEC void mpn_toom63_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1141 #define mpn_toom3_sqr __MPN(toom3_sqr)
1142 __GMP_DECLSPEC void mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1144 #define mpn_toom44_mul __MPN(toom44_mul)
1145 __GMP_DECLSPEC void mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1147 #define mpn_toom4_sqr __MPN(toom4_sqr)
1148 __GMP_DECLSPEC void mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1150 #define mpn_toom6h_mul __MPN(toom6h_mul)
1151 __GMP_DECLSPEC void mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1153 #define mpn_toom6_sqr __MPN(toom6_sqr)
1154 __GMP_DECLSPEC void mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1156 #define mpn_toom8h_mul __MPN(toom8h_mul)
1157 __GMP_DECLSPEC void mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1159 #define mpn_toom8_sqr __MPN(toom8_sqr)
1160 __GMP_DECLSPEC void mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1162 #define mpn_fft_best_k __MPN(fft_best_k)
1163 __GMP_DECLSPEC int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1165 #define mpn_mul_fft __MPN(mul_fft)
1166 __GMP_DECLSPEC mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
1168 #define mpn_mul_fft_full __MPN(mul_fft_full)
1169 __GMP_DECLSPEC void mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1171 #define mpn_nussbaumer_mul __MPN(nussbaumer_mul)
1172 __GMP_DECLSPEC void mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1174 #define mpn_fft_next_size __MPN(fft_next_size)
1175 __GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
1177 #define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
1178 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1180 #define mpn_sbpi1_div_q __MPN(sbpi1_div_q)
1181 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1183 #define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
1184 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1186 #define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
1187 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1188 #define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
1189 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
1191 #define mpn_dcpi1_div_q __MPN(dcpi1_div_q)
1192 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1194 #define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
1195 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
1196 #define mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n)
1197 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
1199 #define mpn_mu_div_qr __MPN(mu_div_qr)
1200 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1201 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1202 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1203 #define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
1204 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1206 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1207 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1209 #define mpn_mu_divappr_q __MPN(mu_divappr_q)
1210 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1211 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1212 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1213 #define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
1214 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
1216 #define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
1217 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1219 #define mpn_mu_div_q __MPN(mu_div_q)
1220 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1221 #define mpn_mu_div_q_itch __MPN(mu_div_q_itch)
1222 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
1224 #define mpn_div_q __MPN(div_q)
1225 __GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1227 #define mpn_invert __MPN(invert)
1228 __GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1229 #define mpn_invert_itch(n) mpn_invertappr_itch(n)
1231 #define mpn_ni_invertappr __MPN(ni_invertappr)
1232 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1233 #define mpn_invertappr __MPN(invertappr)
1234 __GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1235 #define mpn_invertappr_itch(n) (3 * (n) + 2)
1237 #define mpn_binvert __MPN(binvert)
1238 __GMP_DECLSPEC void mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
1239 #define mpn_binvert_itch __MPN(binvert_itch)
1240 __GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
1242 #define mpn_bdiv_q_1 __MPN(bdiv_q_1)
1243 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1245 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
1246 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
1248 #define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
1249 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1251 #define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
1252 __GMP_DECLSPEC void mpn_sbpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1254 #define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
1255 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1256 #define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
1257 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
1259 #define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
1260 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1261 #define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
1262 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
1264 #define mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch)
1265 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
1266 #define mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n)
1267 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
1269 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1270 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1271 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1272 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1274 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1275 __GMP_DECLSPEC void mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1276 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1277 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1279 #define mpn_bdiv_qr __MPN(bdiv_qr)
1280 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1281 #define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
1282 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1284 #define mpn_bdiv_q __MPN(bdiv_q)
1285 __GMP_DECLSPEC void mpn_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1286 #define mpn_bdiv_q_itch __MPN(bdiv_q_itch)
1287 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1289 #define mpn_divexact __MPN(divexact)
1290 __GMP_DECLSPEC void mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
1291 #define mpn_divexact_itch __MPN(divexact_itch)
1292 __GMP_DECLSPEC mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
1294 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1295 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
1296 #define mpn_bdiv_dbm1(dst, src, size, divisor) \
1297 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1299 #define mpn_powm __MPN(powm)
1300 __GMP_DECLSPEC void mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1301 #define mpn_powlo __MPN(powlo)
1302 __GMP_DECLSPEC void mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
1303 #define mpn_powm_sec __MPN(powm_sec)
1304 __GMP_DECLSPEC void mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
1305 #define mpn_powm_sec_itch __MPN(powm_sec_itch)
1306 __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
1307 #define mpn_subcnd_n __MPN(subcnd_n)
1308 __GMP_DECLSPEC mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
1309 #define mpn_tabselect __MPN(tabselect)
1310 __GMP_DECLSPEC void mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
1311 #define mpn_redc_1_sec __MPN(redc_1_sec)
1312 __GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1314 #ifndef DIVEXACT_BY3_METHOD
1315 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1316 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */
1318 #define DIVEXACT_BY3_METHOD 1
1322 #if DIVEXACT_BY3_METHOD == 0
1323 #undef mpn_divexact_by3
1324 #define mpn_divexact_by3(dst,src,size) \
1325 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1326 /* override mpn_divexact_by3c defined in gmp.h */
1328 #undef mpn_divexact_by3c
1329 #define mpn_divexact_by3c(dst,src,size,cy) \
1330 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1334 #if GMP_NUMB_BITS % 4 == 0
1335 #define mpn_divexact_by5(dst,src,size) \
1336 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1339 #if GMP_NUMB_BITS % 6 == 0
1340 #define mpn_divexact_by7(dst,src,size) \
1341 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1344 #if GMP_NUMB_BITS % 6 == 0
1345 #define mpn_divexact_by9(dst,src,size) \
1346 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1349 #if GMP_NUMB_BITS % 10 == 0
1350 #define mpn_divexact_by11(dst,src,size) \
1351 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1354 #if GMP_NUMB_BITS % 12 == 0
1355 #define mpn_divexact_by13(dst,src,size) \
1356 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1359 #if GMP_NUMB_BITS % 4 == 0
1360 #define mpn_divexact_by15(dst,src,size) \
1361 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1364 #define mpz_divexact_gcd __gmpz_divexact_gcd
1365 __GMP_DECLSPEC void mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
1367 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1368 #ifdef _GMP_H_HAVE_FILE
1369 __GMP_DECLSPEC size_t mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
1372 #define mpn_divisible_p __MPN(divisible_p)
1373 __GMP_DECLSPEC int mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
1375 #define mpn_rootrem __MPN(rootrem)
1376 __GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
1380 #define MPN_COPY_INCR(dst, src, n) \
1382 int __i; /* Faster on some Crays with plain int */ \
1383 _Pragma ("_CRI ivdep"); \
1384 for (__i = 0; __i < (n); __i++) \
1385 (dst)[__i] = (src)[__i]; \
1389 /* used by test programs, hence __GMP_DECLSPEC */
1390 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
1391 #define mpn_copyi __MPN(copyi)
1392 __GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1395 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1396 #define MPN_COPY_INCR(dst, src, size) \
1398 ASSERT ((size) >= 0); \
1399 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
1400 mpn_copyi (dst, src, size); \
1404 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
1405 #if ! defined (MPN_COPY_INCR)
1406 #define MPN_COPY_INCR(dst, src, n) \
1408 ASSERT ((n) >= 0); \
1409 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
1412 mp_size_t __n = (n) - 1; \
1413 mp_ptr __dst = (dst); \
1414 mp_srcptr __src = (src); \
1433 #define MPN_COPY_DECR(dst, src, n) \
1435 int __i; /* Faster on some Crays with plain int */ \
1436 _Pragma ("_CRI ivdep"); \
1437 for (__i = (n) - 1; __i >= 0; __i--) \
1438 (dst)[__i] = (src)[__i]; \
1442 /* used by test programs, hence __GMP_DECLSPEC */
1443 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
1444 #define mpn_copyd __MPN(copyd)
1445 __GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
1448 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1449 #define MPN_COPY_DECR(dst, src, size) \
1451 ASSERT ((size) >= 0); \
1452 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
1453 mpn_copyd (dst, src, size); \
1457 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
1458 #if ! defined (MPN_COPY_DECR)
1459 #define MPN_COPY_DECR(dst, src, n) \
1461 ASSERT ((n) >= 0); \
1462 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
1465 mp_size_t __n = (n) - 1; \
1466 mp_ptr __dst = (dst) + __n; \
1467 mp_srcptr __src = (src) + __n; \
1486 #define MPN_COPY(d,s,n) \
1488 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
1489 MPN_COPY_INCR (d, s, n); \
1494 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1495 #define MPN_REVERSE(dst, src, size) \
1497 mp_ptr __dst = (dst); \
1498 mp_size_t __size = (size); \
1499 mp_srcptr __src = (src) + __size - 1; \
1501 ASSERT ((size) >= 0); \
1502 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
1503 CRAY_Pragma ("_CRI ivdep"); \
1504 for (__i = 0; __i < __size; __i++) \
1513 /* Zero n limbs at dst.
1515 For power and powerpc we want an inline stu/bdnz loop for zeroing. On
1516 ppc630 for instance this is optimal since it can sustain only 1 store per
1519 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1520 "for" loop in the generic code below can become stu/bdnz. The do/while
1521 here helps it get to that. The same caveat about plain -mpowerpc64 mode
1522 applies here as to __GMPN_COPY_INCR in gmp.h.
1524 xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1527 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1528 at a time. MPN_ZERO isn't all that important in GMP, so it might be more
1529 trouble than it's worth to do the same, though perhaps a call to memset
1530 would be good when on a GNU system. */
1532 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1533 #define MPN_ZERO(dst, n) \
1535 ASSERT ((n) >= 0); \
1538 mp_ptr __dst = (dst) - 1; \
1539 mp_size_t __n = (n); \
1548 #define MPN_ZERO(dst, n) \
1550 ASSERT ((n) >= 0); \
1553 mp_ptr __dst = (dst); \
1554 mp_size_t __n = (n); \
1563 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1564 start up and would need to strip a lot of zeros before it'd be faster
1565 than a simple cmpl loop. Here are some times in cycles for
1566 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1575 #ifndef MPN_NORMALIZE
1576 #define MPN_NORMALIZE(DST, NLIMBS) \
1578 while ((NLIMBS) > 0) \
1580 if ((DST)[(NLIMBS) - 1] != 0) \
1586 #ifndef MPN_NORMALIZE_NOT_ZERO
1587 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
1589 ASSERT ((NLIMBS) >= 1); \
1592 if ((DST)[(NLIMBS) - 1] != 0) \
1599 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1600 and decrementing size. low should be ptr[0], and will be the new ptr[0]
1601 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
1602 somewhere a non-zero limb. */
1603 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
1605 ASSERT ((size) >= 1); \
1606 ASSERT ((low) == (ptr)[0]); \
1608 while ((low) == 0) \
1611 ASSERT ((size) >= 1); \
1617 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1618 temporary variable; it will be automatically cleared out at function
1619 return. We use __x here to make it possible to accept both mpz_ptr and
1621 #define MPZ_TMP_INIT(X, NLIMBS) \
1623 mpz_ptr __x = (X); \
1624 ASSERT ((NLIMBS) >= 1); \
1625 __x->_mp_alloc = (NLIMBS); \
1626 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \
1629 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
1630 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
1631 ? (mp_ptr) _mpz_realloc(z,n) \
1634 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
1637 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1640 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1641 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
1642 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1644 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1645 without good floating point. There's +2 for rounding up, and a further
1646 +2 since at the last step x limbs are doubled into a 2x+1 limb region
1647 whereas the actual F[2k] value might be only 2x-1 limbs.
1649 Note that a division is done first, since on a 32-bit system it's at
1650 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
1651 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1652 whatever a multiply of two 190Mbyte numbers takes.)
1654 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1655 worked into the multiplier. */
1657 #define MPN_FIB2_SIZE(n) \
1658 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1661 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
1662 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1664 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1665 F[n] + 2*F[n-1] fits in a limb. */
1667 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1668 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
1670 #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */
1673 unsigned long d; /* current index in s[] */
1674 unsigned long s0; /* number corresponding to s[0] */
1675 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */
1676 unsigned char s[SIEVESIZE + 1]; /* sieve table */
1679 #define gmp_init_primesieve __gmp_init_primesieve
1680 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
1682 #define gmp_nextprime __gmp_nextprime
1683 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
1686 #ifndef MUL_TOOM22_THRESHOLD
1687 #define MUL_TOOM22_THRESHOLD 30
1690 #ifndef MUL_TOOM33_THRESHOLD
1691 #define MUL_TOOM33_THRESHOLD 100
1694 #ifndef MUL_TOOM44_THRESHOLD
1695 #define MUL_TOOM44_THRESHOLD 300
1698 #ifndef MUL_TOOM6H_THRESHOLD
1699 #define MUL_TOOM6H_THRESHOLD 350
1702 #ifndef SQR_TOOM6_THRESHOLD
1703 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
1706 #ifndef MUL_TOOM8H_THRESHOLD
1707 #define MUL_TOOM8H_THRESHOLD 450
1710 #ifndef SQR_TOOM8_THRESHOLD
1711 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
1714 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
1715 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
1718 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
1719 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
1722 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
1723 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 100
1726 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
1727 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 110
1730 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a
1731 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat
1732 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
1733 separate hard limit will have been defined. Similarly for TOOM3. */
1734 #ifndef MUL_TOOM22_THRESHOLD_LIMIT
1735 #define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD
1737 #ifndef MUL_TOOM33_THRESHOLD_LIMIT
1738 #define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD
1740 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT
1741 #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD
1744 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
1745 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we
1746 certainly always want it if there's a native assembler mpn_sqr_basecase.)
1748 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
1749 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
1750 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less
1751 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
1752 should be used, and that may be never. */
1754 #ifndef SQR_BASECASE_THRESHOLD
1755 #define SQR_BASECASE_THRESHOLD 0
1758 #ifndef SQR_TOOM2_THRESHOLD
1759 #define SQR_TOOM2_THRESHOLD 50
1762 #ifndef SQR_TOOM3_THRESHOLD
1763 #define SQR_TOOM3_THRESHOLD 120
1766 #ifndef SQR_TOOM4_THRESHOLD
1767 #define SQR_TOOM4_THRESHOLD 400
1770 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */
1771 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
1772 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
1775 #ifndef DC_DIV_QR_THRESHOLD
1776 #define DC_DIV_QR_THRESHOLD 50
1779 #ifndef DC_DIVAPPR_Q_THRESHOLD
1780 #define DC_DIVAPPR_Q_THRESHOLD 200
1783 #ifndef DC_BDIV_QR_THRESHOLD
1784 #define DC_BDIV_QR_THRESHOLD 50
1787 #ifndef DC_BDIV_Q_THRESHOLD
1788 #define DC_BDIV_Q_THRESHOLD 180
1791 #ifndef DIVEXACT_JEB_THRESHOLD
1792 #define DIVEXACT_JEB_THRESHOLD 25
1795 #ifndef INV_MULMOD_BNM1_THRESHOLD
1796 #define INV_MULMOD_BNM1_THRESHOLD (5*MULMOD_BNM1_THRESHOLD)
1799 #ifndef INV_APPR_THRESHOLD
1800 #define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD
1803 #ifndef INV_NEWTON_THRESHOLD
1804 #define INV_NEWTON_THRESHOLD 200
1807 #ifndef BINV_NEWTON_THRESHOLD
1808 #define BINV_NEWTON_THRESHOLD 300
1811 #ifndef MU_DIVAPPR_Q_THRESHOLD
1812 #define MU_DIVAPPR_Q_THRESHOLD 2000
1815 #ifndef MU_DIV_QR_THRESHOLD
1816 #define MU_DIV_QR_THRESHOLD 2000
1819 #ifndef MUPI_DIV_QR_THRESHOLD
1820 #define MUPI_DIV_QR_THRESHOLD 200
1823 #ifndef MU_BDIV_Q_THRESHOLD
1824 #define MU_BDIV_Q_THRESHOLD 2000
1827 #ifndef MU_BDIV_QR_THRESHOLD
1828 #define MU_BDIV_QR_THRESHOLD 2000
1831 #ifndef MULMOD_BNM1_THRESHOLD
1832 #define MULMOD_BNM1_THRESHOLD 16
1835 #ifndef SQRMOD_BNM1_THRESHOLD
1836 #define SQRMOD_BNM1_THRESHOLD 16
1839 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
1840 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2)
1843 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
1845 #ifndef REDC_1_TO_REDC_2_THRESHOLD
1846 #define REDC_1_TO_REDC_2_THRESHOLD 15
1848 #ifndef REDC_2_TO_REDC_N_THRESHOLD
1849 #define REDC_2_TO_REDC_N_THRESHOLD 100
1854 #ifndef REDC_1_TO_REDC_N_THRESHOLD
1855 #define REDC_1_TO_REDC_N_THRESHOLD 100
1858 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
1861 /* First k to use for an FFT modF multiply. A modF FFT is an order
1862 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
1863 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
1864 #define FFT_FIRST_K 4
1866 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
1867 #ifndef MUL_FFT_MODF_THRESHOLD
1868 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3)
1870 #ifndef SQR_FFT_MODF_THRESHOLD
1871 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
1874 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
1875 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
1876 NxN->2N multiply and not recursing into itself is an order
1877 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
1878 is the first better than toom3. */
1879 #ifndef MUL_FFT_THRESHOLD
1880 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
1882 #ifndef SQR_FFT_THRESHOLD
1883 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
1886 /* Table of thresholds for successive modF FFT "k"s. The first entry is
1887 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
1888 etc. See mpn_fft_best_k(). */
1889 #ifndef MUL_FFT_TABLE
1890 #define MUL_FFT_TABLE \
1891 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \
1892 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \
1893 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \
1894 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \
1895 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \
1896 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \
1899 #ifndef SQR_FFT_TABLE
1900 #define SQR_FFT_TABLE \
1901 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
1902 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
1903 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
1904 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
1905 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
1906 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
1916 #ifndef FFT_TABLE_ATTRS
1917 #define FFT_TABLE_ATTRS static const
1920 #define MPN_FFT_TABLE_SIZE 16
1923 #ifndef DC_DIV_QR_THRESHOLD
1924 #define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD)
1927 #ifndef GET_STR_DC_THRESHOLD
1928 #define GET_STR_DC_THRESHOLD 18
1931 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
1932 #define GET_STR_PRECOMPUTE_THRESHOLD 35
1935 #ifndef SET_STR_DC_THRESHOLD
1936 #define SET_STR_DC_THRESHOLD 750
1939 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
1940 #define SET_STR_PRECOMPUTE_THRESHOLD 2000
1943 /* Return non-zero if xp,xsize and yp,ysize overlap.
1944 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
1945 overlap. If both these are false, there's an overlap. */
1946 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
1947 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
1948 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
1949 ( (char *) (xp) + (xsize) > (char *) (yp) \
1950 && (char *) (yp) + (ysize) > (char *) (xp))
1952 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
1953 overlapping. Return zero if they're partially overlapping. */
1954 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
1955 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
1956 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
1957 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
1959 /* Return non-zero if dst,dsize and src,ssize are either identical or
1960 overlapping in a way suitable for an incrementing/decrementing algorithm.
1961 Return zero if they're partially overlapping in an unsuitable fashion. */
1962 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
1963 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1964 #define MPN_SAME_OR_INCR_P(dst, src, size) \
1965 MPN_SAME_OR_INCR2_P(dst, size, src, size)
1966 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
1967 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
1968 #define MPN_SAME_OR_DECR_P(dst, src, size) \
1969 MPN_SAME_OR_DECR2_P(dst, size, src, size)
1972 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
1973 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
1974 does it always. Generally assertions are meant for development, but
1975 might help when looking for a problem later too.
1977 Note that strings shouldn't be used within the ASSERT expression,
1978 eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
1979 used in the !HAVE_STRINGIZE case (ie. K&R). */
1982 #define ASSERT_LINE __LINE__
1984 #define ASSERT_LINE -1
1988 #define ASSERT_FILE __FILE__
1990 #define ASSERT_FILE ""
1993 __GMP_DECLSPEC void __gmp_assert_header __GMP_PROTO ((const char *, int));
1994 __GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
1997 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
1999 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
2002 #define ASSERT_ALWAYS(expr) \
2005 ASSERT_FAIL (expr); \
2009 #define ASSERT(expr) ASSERT_ALWAYS (expr)
2011 #define ASSERT(expr) do {} while (0)
2015 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
2016 that it's zero. In both cases if assertion checking is disabled the
2017 expression is still evaluated. These macros are meant for use with
2018 routines like mpn_add_n() where the return value represents a carry or
2019 whatever that should or shouldn't occur in some context. For example,
2020 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
2022 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
2023 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
2025 #define ASSERT_CARRY(expr) (expr)
2026 #define ASSERT_NOCARRY(expr) (expr)
2030 /* ASSERT_CODE includes code when assertion checking is wanted. This is the
2031 same as writing "#if WANT_ASSERT", but more compact. */
2033 #define ASSERT_CODE(expr) expr
2035 #define ASSERT_CODE(expr)
2039 /* Test that an mpq_t is in fully canonical form. This can be used as
2040 protection on routines like mpq_equal which give wrong results on
2041 non-canonical inputs. */
2043 #define ASSERT_MPQ_CANONICAL(q) \
2045 ASSERT (q->_mp_den._mp_size > 0); \
2046 if (q->_mp_num._mp_size == 0) \
2048 /* zero should be 0/1 */ \
2049 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
2053 /* no common factors */ \
2056 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
2057 ASSERT (mpz_cmp_ui (__g, 1) == 0); \
2062 #define ASSERT_MPQ_CANONICAL(q) do {} while (0)
2065 /* Check that the nail parts are zero. */
2066 #define ASSERT_ALWAYS_LIMB(limb) \
2068 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
2069 ASSERT_ALWAYS (__nail == 0); \
2071 #define ASSERT_ALWAYS_MPN(ptr, size) \
2073 /* let whole loop go dead when no nails */ \
2074 if (GMP_NAIL_BITS != 0) \
2077 for (__i = 0; __i < (size); __i++) \
2078 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
2082 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
2083 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
2085 #define ASSERT_LIMB(limb) do {} while (0)
2086 #define ASSERT_MPN(ptr, size) do {} while (0)
2090 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
2091 size==0 is allowed, and in that case {ptr,size} considered to be zero. */
2093 #define ASSERT_MPN_ZERO_P(ptr,size) \
2096 ASSERT ((size) >= 0); \
2097 for (__i = 0; __i < (size); __i++) \
2098 ASSERT ((ptr)[__i] == 0); \
2100 #define ASSERT_MPN_NONZERO_P(ptr,size) \
2103 int __nonzero = 0; \
2104 ASSERT ((size) >= 0); \
2105 for (__i = 0; __i < (size); __i++) \
2106 if ((ptr)[__i] != 0) \
2111 ASSERT (__nonzero); \
2114 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
2115 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
2119 #if ! HAVE_NATIVE_mpn_com
2121 #define mpn_com(d,s,n) \
2124 mp_srcptr __s = (s); \
2125 mp_size_t __n = (n); \
2126 ASSERT (__n >= 1); \
2127 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
2129 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
2134 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \
2136 mp_srcptr __up = (up); \
2137 mp_srcptr __vp = (vp); \
2138 mp_ptr __rp = (rp); \
2139 mp_size_t __n = (n); \
2140 mp_limb_t __a, __b; \
2142 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \
2143 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \
2151 __rp[__n] = operation; \
2156 #if ! HAVE_NATIVE_mpn_and_n
2158 #define mpn_and_n(rp, up, vp, n) \
2159 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
2162 #if ! HAVE_NATIVE_mpn_andn_n
2164 #define mpn_andn_n(rp, up, vp, n) \
2165 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
2168 #if ! HAVE_NATIVE_mpn_nand_n
2170 #define mpn_nand_n(rp, up, vp, n) \
2171 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
2174 #if ! HAVE_NATIVE_mpn_ior_n
2176 #define mpn_ior_n(rp, up, vp, n) \
2177 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
2180 #if ! HAVE_NATIVE_mpn_iorn_n
2182 #define mpn_iorn_n(rp, up, vp, n) \
2183 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
2186 #if ! HAVE_NATIVE_mpn_nior_n
2188 #define mpn_nior_n(rp, up, vp, n) \
2189 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
2192 #if ! HAVE_NATIVE_mpn_xor_n
2194 #define mpn_xor_n(rp, up, vp, n) \
2195 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
2198 #if ! HAVE_NATIVE_mpn_xnor_n
2200 #define mpn_xnor_n(rp, up, vp, n) \
2201 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
2204 #define mpn_trialdiv __MPN(trialdiv)
2205 __GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *));
2207 #define mpn_remove __MPN(remove)
2208 __GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t));
2211 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2212 #if GMP_NAIL_BITS == 0
2213 #define ADDC_LIMB(cout, w, x, y) \
2215 mp_limb_t __x = (x); \
2216 mp_limb_t __y = (y); \
2217 mp_limb_t __w = __x + __y; \
2219 (cout) = __w < __x; \
2222 #define ADDC_LIMB(cout, w, x, y) \
2228 (w) = __w & GMP_NUMB_MASK; \
2229 (cout) = __w >> GMP_NUMB_BITS; \
2233 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2235 #if GMP_NAIL_BITS == 0
2236 #define SUBC_LIMB(cout, w, x, y) \
2238 mp_limb_t __x = (x); \
2239 mp_limb_t __y = (y); \
2240 mp_limb_t __w = __x - __y; \
2242 (cout) = __w > __x; \
2245 #define SUBC_LIMB(cout, w, x, y) \
2247 mp_limb_t __w = (x) - (y); \
2248 (w) = __w & GMP_NUMB_MASK; \
2249 (cout) = __w >> (GMP_LIMB_BITS-1); \
2254 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2255 expecting no carry (or borrow) from that.
2257 The size parameter is only for the benefit of assertion checking. In a
2258 normal build it's unused and the carry/borrow is just propagated as far
2261 On random data, usually only one or two limbs of {ptr,size} get updated,
2262 so there's no need for any sophisticated looping, just something compact
2265 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2266 declaring their operand sizes, then remove the former. This is purely
2267 for the benefit of assertion checking. */
2269 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \
2270 && GMP_LIMB_BITS == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
2271 /* Better flags handling than the generic C gives on i386, saving a few
2272 bytes of code and maybe a cycle or two. */
2274 #define MPN_IORD_U(ptr, incr, aors) \
2276 mp_ptr __ptr_dummy; \
2277 if (__builtin_constant_p (incr) && (incr) == 1) \
2279 __asm__ __volatile__ \
2280 ("\n" ASM_L(top) ":\n" \
2281 "\t" aors " $1, (%0)\n" \
2282 "\tleal 4(%0),%0\n" \
2283 "\tjc " ASM_L(top) \
2284 : "=r" (__ptr_dummy) \
2290 __asm__ __volatile__ \
2291 ( aors " %2,(%0)\n" \
2292 "\tjnc " ASM_L(done) "\n" \
2294 "\t" aors " $1,4(%0)\n" \
2295 "\tleal 4(%0),%0\n" \
2296 "\tjc " ASM_L(top) "\n" \
2298 : "=r" (__ptr_dummy) \
2305 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
2306 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
2307 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
2308 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
2311 #if GMP_NAIL_BITS == 0
2313 #define mpn_incr_u(p,incr) \
2317 if (__builtin_constant_p (incr) && (incr) == 1) \
2319 while (++(*(__p++)) == 0) \
2324 __x = *__p + (incr); \
2327 while (++(*(++__p)) == 0) \
2333 #define mpn_decr_u(p,incr) \
2337 if (__builtin_constant_p (incr) && (incr) == 1) \
2339 while ((*(__p++))-- == 0) \
2345 *__p = __x - (incr); \
2347 while ((*(++__p))-- == 0) \
2354 #if GMP_NAIL_BITS >= 1
2356 #define mpn_incr_u(p,incr) \
2360 if (__builtin_constant_p (incr) && (incr) == 1) \
2364 __x = (*__p + 1) & GMP_NUMB_MASK; \
2371 __x = (*__p + (incr)); \
2372 *__p++ = __x & GMP_NUMB_MASK; \
2373 if (__x >> GMP_NUMB_BITS != 0) \
2377 __x = (*__p + 1) & GMP_NUMB_MASK; \
2386 #define mpn_decr_u(p,incr) \
2390 if (__builtin_constant_p (incr) && (incr) == 1) \
2395 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2401 __x = *__p - (incr); \
2402 *__p++ = __x & GMP_NUMB_MASK; \
2403 if (__x >> GMP_NUMB_BITS != 0) \
2408 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2419 #define MPN_INCR_U(ptr, size, n) \
2421 ASSERT ((size) >= 1); \
2422 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
2425 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
2431 #define MPN_DECR_U(ptr, size, n) \
2433 ASSERT ((size) >= 1); \
2434 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
2437 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
2442 /* Structure for conversion between internal binary format and
2443 strings in base 2..36. */
2446 /* Number of digits in the conversion base that always fits in an mp_limb_t.
2447 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2448 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
2451 /* log(2)/log(conversion_base) */
2452 double chars_per_bit_exactly;
2454 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2455 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
2456 i.e. the number of bits used to represent each digit in the base. */
2459 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
2460 fixed-point number. Instead of dividing by big_base an application can
2461 choose to multiply by big_base_inverted. */
2462 mp_limb_t big_base_inverted;
2465 #define mp_bases __MPN(bases)
2466 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2469 /* For power of 2 bases this is exact. For other bases the result is either
2470 exact or one too big.
2472 To be exact always it'd be necessary to examine all the limbs of the
2473 operand, since numbers like 100..000 and 99...999 generally differ only
2474 in the lowest limb. It'd be possible to examine just a couple of high
2475 limbs to increase the probability of being exact, but that doesn't seem
2476 worth bothering with. */
2478 #define MPN_SIZEINBASE(result, ptr, size, base) \
2480 int __lb_base, __cnt; \
2483 ASSERT ((size) >= 0); \
2484 ASSERT ((base) >= 2); \
2485 ASSERT ((base) < numberof (mp_bases)); \
2487 /* Special case for X == 0. */ \
2492 /* Calculate the total number of significant bits of X. */ \
2493 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2494 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2496 if (POW2_P (base)) \
2498 __lb_base = mp_bases[base].big_base; \
2499 (result) = (__totbits + __lb_base - 1) / __lb_base; \
2502 (result) = (size_t) \
2503 (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \
2507 /* eliminate mp_bases lookups for base==16 */
2508 #define MPN_SIZEINBASE_16(result, ptr, size) \
2511 mp_size_t __totbits; \
2513 ASSERT ((size) >= 0); \
2515 /* Special case for X == 0. */ \
2520 /* Calculate the total number of significant bits of X. */ \
2521 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2522 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2523 (result) = (__totbits + 4 - 1) / 4; \
2527 /* bit count to limb count, rounding up */
2528 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2530 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake
2531 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs
2532 in both cases (LIMBS_PER_ULONG is also defined here.) */
2533 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2535 #define LIMBS_PER_ULONG 1
2536 #define MPN_SET_UI(zp, zn, u) \
2538 (zn) = ((zp)[0] != 0);
2539 #define MPZ_FAKE_UI(z, zp, u) \
2542 SIZ (z) = ((zp)[0] != 0); \
2543 ASSERT_CODE (ALLOC (z) = 1);
2545 #else /* need two limbs per ulong */
2547 #define LIMBS_PER_ULONG 2
2548 #define MPN_SET_UI(zp, zn, u) \
2549 (zp)[0] = (u) & GMP_NUMB_MASK; \
2550 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2551 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2552 #define MPZ_FAKE_UI(z, zp, u) \
2553 (zp)[0] = (u) & GMP_NUMB_MASK; \
2554 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2555 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2557 ASSERT_CODE (ALLOC (z) = 2);
2562 #if HAVE_HOST_CPU_FAMILY_x86
2563 #define TARGET_REGISTER_STARVED 1
2565 #define TARGET_REGISTER_STARVED 0
2569 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2570 or 0 there into a limb 0xFF..FF or 0 respectively.
2572 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2573 but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2574 a little compile-time test and a fallback to a "? :" form. The latter is
2575 necessary for instance on Cray vector systems.
2577 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2578 to an arithmetic right shift anyway, but it's good to get the desired
2579 shift on past versions too (in particular since an important use of
2580 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
2582 #define LIMB_HIGHBIT_TO_MASK(n) \
2583 (((mp_limb_signed_t) -1 >> 1) < 0 \
2584 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
2585 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2588 /* Use a library function for invert_limb, if available. */
2589 #define mpn_invert_limb __MPN(invert_limb)
2590 __GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
2591 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2592 #define invert_limb(invxl,xl) \
2594 (invxl) = mpn_invert_limb (xl); \
2599 #define invert_limb(invxl,xl) \
2602 ASSERT ((xl) != 0); \
2603 udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl); \
2607 #define invert_pi1(dinv, d1, d0) \
2609 mp_limb_t v, p, t1, t0, mask; \
2610 invert_limb (v, d1); \
2616 mask = -(p >= d1); \
2621 umul_ppmm (t1, t0, d0, v); \
2626 if (UNLIKELY (p >= d1)) \
2628 if (p > d1 || t0 >= d0) \
2636 #ifndef udiv_qrnnd_preinv
2637 #define udiv_qrnnd_preinv udiv_qrnnd_preinv3
2640 /* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
2641 limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS).
2642 If this would yield overflow, DI should be the largest possible number
2643 (i.e., only ones). For correct operation, the most significant bit of D
2644 has to be set. Put the quotient in Q and the remainder in R. */
2645 #define udiv_qrnnd_preinv1(q, r, nh, nl, d, di) \
2647 mp_limb_t _q, _ql, _r; \
2648 mp_limb_t _xh, _xl; \
2649 ASSERT ((d) != 0); \
2650 umul_ppmm (_q, _ql, (nh), (di)); \
2651 _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */ \
2652 umul_ppmm (_xh, _xl, _q, (d)); \
2653 sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
2656 sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
2673 /* Like udiv_qrnnd_preinv, but branch-free. */
2674 #define udiv_qrnnd_preinv2(q, r, nh, nl, d, di) \
2676 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2677 mp_limb_t _xh, _xl; \
2680 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2681 _nadj = _n10 + (_nmask & (d)); \
2682 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2683 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2685 umul_ppmm (_xh, _xl, _q1, d); \
2686 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2687 _xh -= (d); /* xh = 0 or -1 */ \
2688 (r) = _xl + ((d) & _xh); \
2692 /* Like udiv_qrnnd_preinv2, but for for any value D. DNORM is D shifted left
2693 so that its most significant bit is set. LGUP is ceil(log2(D)). */
2694 #define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
2696 mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
2697 mp_limb_t _xh, _xl; \
2698 _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1)); \
2699 _n10 = (nl) << (GMP_LIMB_BITS - (lgup)); \
2700 _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
2701 _nadj = _n10 + (_nmask & (dnorm)); \
2702 umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
2703 add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
2705 umul_ppmm (_xh, _xl, _q1, d); \
2706 add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
2708 (r) = _xl + ((d) & _xh); \
2712 /* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
2714 We write things strangely below, to help gcc. A more straightforward
2717 _r = (nl) - _qh * (d);
2725 For one operation shorter critical path, one may want to use this form:
2737 #define udiv_qrnnd_preinv3(q, r, nh, nl, d, di) \
2739 mp_limb_t _qh, _ql, _r; \
2740 umul_ppmm (_qh, _ql, (nh), (di)); \
2741 if (__builtin_constant_p (nl) && (nl) == 0) \
2744 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
2745 _r = (nl) - _qh * (d); \
2746 if (_r > _ql) /* both > and >= should be OK */ \
2751 if (UNLIKELY (_r >= (d))) \
2760 /* Compute r = nh*B mod d, where di is the inverse of d. */
2761 #define udiv_rnd_preinv(r, nh, d, di) \
2763 mp_limb_t _qh, _ql, _r; \
2764 umul_ppmm (_qh, _ql, (nh), (di)); \
2772 /* Compute quotient the quotient and remainder for n / d. Requires d
2773 >= B^2 / 2 and n < d B. di is the inverse
2775 floor ((B^3 - 1) / (d0 + d1 B)) - B.
2777 NOTE: Output variables are updated multiple times. Only some inputs
2778 and outputs may overlap.
2780 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
2782 mp_limb_t _q0, _t1, _t0, _mask; \
2783 umul_ppmm ((q), _q0, (n2), (dinv)); \
2784 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
2786 /* Compute the two most significant limbs of n - q'd */ \
2787 (r1) = (n1) - (d1) * (q); \
2789 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
2790 umul_ppmm (_t1, _t0, (d0), (q)); \
2791 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
2794 /* Conditionally adjust q and the remainders */ \
2795 _mask = - (mp_limb_t) ((r1) >= _q0); \
2797 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
2798 if (UNLIKELY ((r1) >= (d1))) \
2800 if ((r1) > (d1) || (r0) >= (d0)) \
2803 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
2808 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
2809 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
2810 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
2814 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
2815 plain mpn_divrem_1. The default is yes, since the few CISC chips where
2816 preinv is not good have defines saying so. */
2817 #ifndef USE_PREINV_DIVREM_1
2818 #define USE_PREINV_DIVREM_1 1
2821 #if USE_PREINV_DIVREM_1
2822 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2823 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
2825 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
2826 mpn_divrem_1 (qp, xsize, ap, size, d)
2829 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
2830 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
2833 /* This selection may seem backwards. The reason mpn_mod_1 typically takes
2834 over for larger sizes is that it uses the mod_1_1 function. */
2835 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
2836 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \
2837 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \
2838 : mpn_mod_1 (src, size, divisor))
2841 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
2842 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
2843 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
2847 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
2848 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for
2849 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
2850 modexact are faster at all sizes, so the defaults are 0. Those CPUs
2851 where this is not right have a tuned threshold. */
2852 #ifndef DIVEXACT_1_THRESHOLD
2853 #define DIVEXACT_1_THRESHOLD 0
2855 #ifndef BMOD_1_TO_MOD_1_THRESHOLD
2856 #define BMOD_1_TO_MOD_1_THRESHOLD 10
2859 #ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */
2860 #define mpn_divexact_1 __MPN(divexact_1)
2861 __GMP_DECLSPEC void mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
2864 #define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \
2866 if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \
2867 ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
2870 ASSERT (mpn_mod_1 (src, size, divisor) == 0); \
2871 mpn_divexact_1 (dst, src, size, divisor); \
2875 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
2876 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
2877 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2880 #if HAVE_NATIVE_mpn_modexact_1_odd
2881 #define mpn_modexact_1_odd __MPN(modexact_1_odd)
2882 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
2884 #define mpn_modexact_1_odd(src,size,divisor) \
2885 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
2888 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
2889 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \
2890 ? mpn_modexact_1_odd (src, size, divisor) \
2891 : mpn_mod_1 (src, size, divisor))
2893 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
2894 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
2895 n must be odd (otherwise such an inverse doesn't exist).
2897 This is not to be confused with invert_limb(), which is completely
2900 The table lookup gives an inverse with the low 8 bits valid, and each
2901 multiply step doubles the number of bits. See Jebelean "An algorithm for
2902 exact division" end of section 4 (reference in gmp.texi).
2904 Possible enhancement: Could use UHWtype until the last step, if half-size
2905 multiplies are faster (might help under _LONG_LONG_LIMB).
2907 Alternative: As noted in Granlund and Montgomery "Division by Invariant
2908 Integers using Multiplication" (reference in gmp.texi), n itself gives a
2909 3-bit inverse immediately, and could be used instead of a table lookup.
2910 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
2911 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
2913 #define binvert_limb_table __gmp_binvert_limb_table
2914 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128];
2916 #define binvert_limb(inv,n) \
2918 mp_limb_t __n = (n); \
2920 ASSERT ((__n & 1) == 1); \
2922 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \
2923 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
2924 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
2925 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
2927 if (GMP_NUMB_BITS > 64) \
2929 int __invbits = 64; \
2931 __inv = 2 * __inv - __inv * __inv * __n; \
2933 } while (__invbits < GMP_NUMB_BITS); \
2936 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
2937 (inv) = __inv & GMP_NUMB_MASK; \
2939 #define modlimb_invert binvert_limb /* backward compatibility */
2941 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
2942 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
2943 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
2944 we need to start from GMP_NUMB_MAX>>1. */
2945 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
2947 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
2948 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
2949 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1)
2950 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
2953 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
2955 It's not clear whether this is the best way to do this calculation.
2956 Anything congruent to -a would be fine for the one limb congruence
2959 #define NEG_MOD(r, a, d) \
2961 ASSERT ((d) != 0); \
2967 /* small a is reasonably likely */ \
2973 mp_limb_t __dnorm; \
2974 count_leading_zeros (__twos, d); \
2975 __twos -= GMP_NAIL_BITS; \
2976 __dnorm = (d) << __twos; \
2977 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
2983 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
2984 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
2987 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
2988 to 0 if there's an even number. "n" should be an unsigned long and "p"
2991 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
2992 #define ULONG_PARITY(p, n) \
2995 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \
3000 /* Cray intrinsic _popcnt. */
3002 #define ULONG_PARITY(p, n) \
3004 (p) = _popcnt (n) & 1; \
3008 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3009 && ! defined (NO_ASM) && defined (__ia64)
3010 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
3011 to a 64 bit unsigned long long for popcnt */
3012 #define ULONG_PARITY(p, n) \
3014 unsigned long long __n = (unsigned long) (n); \
3016 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
3021 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3022 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
3023 #if __GMP_GNUC_PREREQ (3,1)
3024 #define __GMP_qm "=Qm"
3025 #define __GMP_q "=Q"
3027 #define __GMP_qm "=qm"
3028 #define __GMP_q "=q"
3030 #define ULONG_PARITY(p, n) \
3033 unsigned long __n = (n); \
3034 __n ^= (__n >> 16); \
3035 __asm__ ("xorb %h1, %b1\n\t" \
3037 : __GMP_qm (__p), __GMP_q (__n) \
3043 #if ! defined (ULONG_PARITY)
3044 #define ULONG_PARITY(p, n) \
3046 unsigned long __n = (n); \
3050 __p ^= 0x96696996L >> (__n & 0x1F); \
3060 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of
3061 version 3.1 at least) doesn't seem to know how to generate rlwimi for
3062 anything other than bit-fields, so use "asm". */
3063 #if defined (__GNUC__) && ! defined (NO_ASM) \
3064 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
3065 #define BSWAP_LIMB(dst, src) \
3067 mp_limb_t __bswapl_src = (src); \
3068 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \
3069 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \
3070 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \
3071 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \
3072 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \
3073 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \
3074 (dst) = __tmp1 | __tmp2; /* whole */ \
3078 /* bswap is available on i486 and up and is fast. A combination rorw $8 /
3079 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
3080 kernel with xchgb instead of rorw), but this is not done here, because
3081 i386 means generic x86 and mixing word and dword operations will cause
3082 partial register stalls on P6 chips. */
3083 #if defined (__GNUC__) && ! defined (NO_ASM) \
3084 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
3085 && GMP_LIMB_BITS == 32
3086 #define BSWAP_LIMB(dst, src) \
3088 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \
3092 #if defined (__GNUC__) && ! defined (NO_ASM) \
3093 && defined (__amd64__) && GMP_LIMB_BITS == 64
3094 #define BSWAP_LIMB(dst, src) \
3096 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \
3100 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3101 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3102 #define BSWAP_LIMB(dst, src) \
3104 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
3109 #if defined (__GNUC__) && ! defined (NO_ASM) \
3110 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
3111 #define BSWAP_LIMB(dst, src) \
3113 mp_limb_t __bswapl_src = (src); \
3114 __asm__ ("ror%.w %#8, %0\n\t" \
3118 : "0" (__bswapl_src)); \
3122 #if ! defined (BSWAP_LIMB)
3123 #if GMP_LIMB_BITS == 8
3124 #define BSWAP_LIMB(dst, src) \
3125 do { (dst) = (src); } while (0)
3127 #if GMP_LIMB_BITS == 16
3128 #define BSWAP_LIMB(dst, src) \
3130 (dst) = ((src) << 8) + ((src) >> 8); \
3133 #if GMP_LIMB_BITS == 32
3134 #define BSWAP_LIMB(dst, src) \
3138 + (((src) & 0xFF00) << 8) \
3139 + (((src) >> 8) & 0xFF00) \
3143 #if GMP_LIMB_BITS == 64
3144 #define BSWAP_LIMB(dst, src) \
3148 + (((src) & 0xFF00) << 40) \
3149 + (((src) & 0xFF0000) << 24) \
3150 + (((src) & 0xFF000000) << 8) \
3151 + (((src) >> 8) & 0xFF000000) \
3152 + (((src) >> 24) & 0xFF0000) \
3153 + (((src) >> 40) & 0xFF00) \
3159 #if ! defined (BSWAP_LIMB)
3160 #define BSWAP_LIMB(dst, src) \
3162 mp_limb_t __bswapl_src = (src); \
3163 mp_limb_t __dst = 0; \
3165 for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \
3167 __dst = (__dst << 8) | (__bswapl_src & 0xFF); \
3168 __bswapl_src >>= 8; \
3175 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3176 those we know are fast. */
3177 #if defined (__GNUC__) && ! defined (NO_ASM) \
3178 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3179 && (HAVE_HOST_CPU_powerpc604 \
3180 || HAVE_HOST_CPU_powerpc604e \
3181 || HAVE_HOST_CPU_powerpc750 \
3182 || HAVE_HOST_CPU_powerpc7400)
3183 #define BSWAP_LIMB_FETCH(limb, src) \
3185 mp_srcptr __blf_src = (src); \
3187 __asm__ ("lwbrx %0, 0, %1" \
3189 : "r" (__blf_src), \
3190 "m" (*__blf_src)); \
3195 #if ! defined (BSWAP_LIMB_FETCH)
3196 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
3200 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3201 know are fast. FIXME: Is this necessary? */
3202 #if defined (__GNUC__) && ! defined (NO_ASM) \
3203 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3204 && (HAVE_HOST_CPU_powerpc604 \
3205 || HAVE_HOST_CPU_powerpc604e \
3206 || HAVE_HOST_CPU_powerpc750 \
3207 || HAVE_HOST_CPU_powerpc7400)
3208 #define BSWAP_LIMB_STORE(dst, limb) \
3210 mp_ptr __dst = (dst); \
3211 mp_limb_t __limb = (limb); \
3212 __asm__ ("stwbrx %1, 0, %2" \
3219 #if ! defined (BSWAP_LIMB_STORE)
3220 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
3224 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3225 #define MPN_BSWAP(dst, src, size) \
3227 mp_ptr __dst = (dst); \
3228 mp_srcptr __src = (src); \
3229 mp_size_t __size = (size); \
3231 ASSERT ((size) >= 0); \
3232 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
3233 CRAY_Pragma ("_CRI ivdep"); \
3234 for (__i = 0; __i < __size; __i++) \
3236 BSWAP_LIMB_FETCH (*__dst, __src); \
3242 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3243 #define MPN_BSWAP_REVERSE(dst, src, size) \
3245 mp_ptr __dst = (dst); \
3246 mp_size_t __size = (size); \
3247 mp_srcptr __src = (src) + __size - 1; \
3249 ASSERT ((size) >= 0); \
3250 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
3251 CRAY_Pragma ("_CRI ivdep"); \
3252 for (__i = 0; __i < __size; __i++) \
3254 BSWAP_LIMB_FETCH (*__dst, __src); \
3261 /* No processor claiming to be SPARC v9 compliant seems to
3262 implement the POPC instruction. Disable pattern for now. */
3264 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
3265 #define popc_limb(result, input) \
3268 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \
3273 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3274 #define popc_limb(result, input) \
3276 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \
3280 /* Cray intrinsic. */
3282 #define popc_limb(result, input) \
3284 (result) = _popcnt (input); \
3288 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3289 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3290 #define popc_limb(result, input) \
3292 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
3296 /* Cool population count of an mp_limb_t.
3297 You have to figure out how this works, We won't tell you!
3299 The constants could also be expressed as:
3300 0x55... = [2^N / 3] = [(2^N-1)/3]
3301 0x33... = [2^N / 5] = [(2^N-1)/5]
3302 0x0f... = [2^N / 17] = [(2^N-1)/17]
3303 (N is GMP_LIMB_BITS, [] denotes truncation.) */
3305 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3306 #define popc_limb(result, input) \
3308 mp_limb_t __x = (input); \
3309 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3310 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3311 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3312 (result) = __x & 0xff; \
3316 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3317 #define popc_limb(result, input) \
3319 mp_limb_t __x = (input); \
3320 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3321 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3322 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3323 __x = ((__x >> 8) + __x); \
3324 (result) = __x & 0xff; \
3328 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3329 #define popc_limb(result, input) \
3331 mp_limb_t __x = (input); \
3332 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3333 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3334 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3335 __x = ((__x >> 8) + __x); \
3336 __x = ((__x >> 16) + __x); \
3337 (result) = __x & 0xff; \
3341 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3342 #define popc_limb(result, input) \
3344 mp_limb_t __x = (input); \
3345 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3346 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3347 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3348 __x = ((__x >> 8) + __x); \
3349 __x = ((__x >> 16) + __x); \
3350 __x = ((__x >> 32) + __x); \
3351 (result) = __x & 0xff; \
3356 /* Define stuff for longlong.h. */
3357 #if HAVE_ATTRIBUTE_MODE
3358 typedef unsigned int UQItype __attribute__ ((mode (QI)));
3359 typedef int SItype __attribute__ ((mode (SI)));
3360 typedef unsigned int USItype __attribute__ ((mode (SI)));
3361 typedef int DItype __attribute__ ((mode (DI)));
3362 typedef unsigned int UDItype __attribute__ ((mode (DI)));
3364 typedef unsigned char UQItype;
3365 typedef long SItype;
3366 typedef unsigned long USItype;
3368 typedef long long int DItype;
3369 typedef unsigned long long int UDItype;
3370 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
3371 typedef long int DItype;
3372 typedef unsigned long int UDItype;
3376 typedef mp_limb_t UWtype;
3377 typedef unsigned int UHWtype;
3378 #define W_TYPE_SIZE GMP_LIMB_BITS
3380 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3382 Bit field packing is "implementation defined" according to C99, which
3383 leaves us at the compiler's mercy here. For some systems packing is
3384 defined in the ABI (eg. x86). In any case so far it seems universal that
3385 little endian systems pack from low to high, and big endian from high to
3386 low within the given type.
3388 Within the fields we rely on the integer endianness being the same as the
3389 float endianness, this is true everywhere we know of and it'd be a fairly
3390 strange system that did anything else. */
3392 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3393 #define _GMP_IEEE_FLOATS 1
3394 union ieee_double_extract
3398 gmp_uint_least32_t manh:20;
3399 gmp_uint_least32_t exp:11;
3400 gmp_uint_least32_t sig:1;
3401 gmp_uint_least32_t manl:32;
3407 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3408 #define _GMP_IEEE_FLOATS 1
3409 union ieee_double_extract
3413 gmp_uint_least32_t manl:32;
3414 gmp_uint_least32_t manh:20;
3415 gmp_uint_least32_t exp:11;
3416 gmp_uint_least32_t sig:1;
3422 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3423 #define _GMP_IEEE_FLOATS 1
3424 union ieee_double_extract
3428 gmp_uint_least32_t sig:1;
3429 gmp_uint_least32_t exp:11;
3430 gmp_uint_least32_t manh:20;
3431 gmp_uint_least32_t manl:32;
3438 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3439 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
3440 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3441 /* Maximum number of limbs it will take to store any `double'.
3442 We assume doubles have 53 mantissa bits. */
3443 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3445 __GMP_DECLSPEC int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
3447 #define mpn_get_d __gmpn_get_d
3448 __GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
3451 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3452 a_inf if x is an infinity. Both are considered unlikely values, for
3453 branch prediction. */
3455 #if _GMP_IEEE_FLOATS
3456 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3458 union ieee_double_extract u; \
3460 if (UNLIKELY (u.s.exp == 0x7FF)) \
3462 if (u.s.manl == 0 && u.s.manh == 0) \
3470 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3471 /* no nans or infs in these formats */
3472 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3476 #ifndef DOUBLE_NAN_INF_ACTION
3477 /* Unknown format, try something generic.
3478 NaN should be "unordered", so x!=x.
3479 Inf should be bigger than DBL_MAX. */
3480 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3483 if (UNLIKELY ((x) != (x))) \
3485 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
3491 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3492 in the coprocessor, which means a bigger exponent range than normal, and
3493 depending on the rounding mode, a bigger mantissa than normal. (See
3494 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches
3495 "d" through memory to force any rounding and overflows to occur.
3497 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3498 registers, where there's no such extra precision and no need for the
3499 FORCE_DOUBLE. We don't bother to detect this since the present uses for
3500 FORCE_DOUBLE are only in test programs and default generic C code.
3502 Not quite sure that an "automatic volatile" will use memory, but it does
3503 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3504 apparently matching operands like "0" are only allowed on a register
3505 output. gcc 3.4 warns about this, though in fact it and past versions
3506 seem to put the operand through memory as hoped. */
3508 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \
3509 || defined (__amd64__))
3510 #define FORCE_DOUBLE(d) \
3511 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3513 #define FORCE_DOUBLE(d) do { } while (0)
3517 __GMP_DECLSPEC extern int __gmp_junk;
3518 __GMP_DECLSPEC extern const int __gmp_0;
3519 __GMP_DECLSPEC void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
3520 __GMP_DECLSPEC void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3521 __GMP_DECLSPEC void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3522 __GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
3523 #define GMP_ERROR(code) __gmp_exception (code)
3524 #define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
3525 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
3527 #if defined _LONG_LONG_LIMB
3528 #if __GMP_HAVE_TOKEN_PASTE
3529 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3531 #define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
3533 #else /* not _LONG_LONG_LIMB */
3534 #if __GMP_HAVE_TOKEN_PASTE
3535 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3537 #define CNST_LIMB(C) ((mp_limb_t) C/**/L)
3539 #endif /* _LONG_LONG_LIMB */
3541 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3542 #if GMP_NUMB_BITS == 2
3543 #define PP 0x3 /* 3 */
3544 #define PP_FIRST_OMITTED 5
3546 #if GMP_NUMB_BITS == 4
3547 #define PP 0xF /* 3 x 5 */
3548 #define PP_FIRST_OMITTED 7
3550 #if GMP_NUMB_BITS == 8
3551 #define PP 0x69 /* 3 x 5 x 7 */
3552 #define PP_FIRST_OMITTED 11
3554 #if GMP_NUMB_BITS == 16
3555 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
3556 #define PP_FIRST_OMITTED 17
3558 #if GMP_NUMB_BITS == 32
3559 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
3560 #define PP_INVERTED 0x53E5645CL
3561 #define PP_FIRST_OMITTED 31
3563 #if GMP_NUMB_BITS == 64
3564 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
3565 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3566 #define PP_FIRST_OMITTED 59
3568 #ifndef PP_FIRST_OMITTED
3569 #define PP_FIRST_OMITTED 3
3574 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3575 zero bit representing +1 and a one bit representing -1. Bits other than
3576 bit 1 are garbage. These are meant to be kept in "int"s, and casts are
3577 used to ensure the expressions are "int"s even if a and/or b might be
3580 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3581 and their speed is important. Expressions are used rather than
3582 conditionals to accumulate sign changes, which effectively means XORs
3583 instead of conditional JUMPs. */
3585 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3586 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
3588 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3589 #define JACOBI_U0(a) ((a) == 1)
3591 /* (a/0), with a given by low and size;
3592 is 1 if a=+/-1, 0 otherwise */
3593 #define JACOBI_LS0(alow,asize) \
3594 (((asize) == 1 || (asize) == -1) && (alow) == 1)
3596 /* (a/0), with a an mpz_t;
3597 fetch of low limb always valid, even if size is zero */
3598 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
3600 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3601 #define JACOBI_0U(b) ((b) == 1)
3603 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3604 #define JACOBI_0S(b) ((b) == 1 || (b) == -1)
3606 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3607 #define JACOBI_0LS(blow,bsize) \
3608 (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3610 /* Convert a bit1 to +1 or -1. */
3611 #define JACOBI_BIT1_TO_PN(result_bit1) \
3612 (1 - ((int) (result_bit1) & 2))
3614 /* (2/b), with b unsigned and odd;
3615 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3616 hence obtained from (b>>1)^b */
3617 #define JACOBI_TWO_U_BIT1(b) \
3618 ((int) (((b) >> 1) ^ (b)))
3620 /* (2/b)^twos, with b unsigned and odd */
3621 #define JACOBI_TWOS_U_BIT1(twos, b) \
3622 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3624 /* (2/b)^twos, with b unsigned and odd */
3625 #define JACOBI_TWOS_U(twos, b) \
3626 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3628 /* (-1/b), with b odd (signed or unsigned);
3629 is (-1)^((b-1)/2) */
3630 #define JACOBI_N1B_BIT1(b) \
3633 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3634 is (-1/b) if a<0, or +1 if a>=0 */
3635 #define JACOBI_ASGN_SU_BIT1(a, b) \
3636 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3638 /* (a/b) effect due to sign of b: signed/signed;
3639 is -1 if a and b both negative, +1 otherwise */
3640 #define JACOBI_BSGN_SS_BIT1(a, b) \
3641 ((((a)<0) & ((b)<0)) << 1)
3643 /* (a/b) effect due to sign of b: signed/mpz;
3644 is -1 if a and b both negative, +1 otherwise */
3645 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3646 JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3648 /* (a/b) effect due to sign of b: mpz/signed;
3649 is -1 if a and b both negative, +1 otherwise */
3650 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3651 JACOBI_BSGN_SZ_BIT1 (b, a)
3653 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3654 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3655 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
3656 because this is used in a couple of places with only bit 1 of a or b
3658 #define JACOBI_RECIP_UU_BIT1(a, b) \
3661 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3662 decrementing b_size. b_low should be b_ptr[0] on entry, and will be
3663 updated for the new b_ptr. result_bit1 is updated according to the
3664 factors of 2 stripped, as per (a/2). */
3665 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
3667 ASSERT ((b_size) >= 1); \
3668 ASSERT ((b_low) == (b_ptr)[0]); \
3670 while (UNLIKELY ((b_low) == 0)) \
3673 ASSERT ((b_size) >= 1); \
3675 (b_low) = *(b_ptr); \
3677 ASSERT (((a) & 1) != 0); \
3678 if ((GMP_NUMB_BITS % 2) == 1) \
3679 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
3683 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3684 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
3685 unsigned. modexact_1_odd effectively calculates -a mod b, and
3686 result_bit1 is adjusted for the factor of -1.
3688 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3689 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3690 factor to introduce into result_bit1, so for that case use mpn_mod_1
3693 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3694 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
3695 or not skip a divide step, or something. */
3697 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3699 mp_srcptr __a_ptr = (a_ptr); \
3700 mp_size_t __a_size = (a_size); \
3701 mp_limb_t __b = (b); \
3703 ASSERT (__a_size >= 1); \
3706 if ((GMP_NUMB_BITS % 2) != 0 \
3707 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
3709 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
3713 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
3714 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
3718 /* Matrix multiplication */
3719 #define mpn_matrix22_mul __MPN(matrix22_mul)
3720 __GMP_DECLSPEC void mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3721 #define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
3722 __GMP_DECLSPEC void mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
3723 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
3724 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
3726 #ifndef MATRIX22_STRASSEN_THRESHOLD
3727 #define MATRIX22_STRASSEN_THRESHOLD 30
3730 /* HGCD definitions */
3732 /* Extract one numb, shifting count bits left
3734 |___xh___||___xl___|
3738 The count includes any nail bits, so it should work fine if count
3739 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
3740 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
3742 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
3743 those calls where the count high bits of xh may be non-zero.
3746 #define MPN_EXTRACT_NUMB(count, xh, xl) \
3747 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \
3748 ((xl) >> (GMP_LIMB_BITS - (count))))
3751 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
3752 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
3753 than a, b. The determinant must always be one, so that M has an
3754 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
3761 #define mpn_hgcd2 __MPN (hgcd2)
3762 __GMP_DECLSPEC int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
3764 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
3765 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3767 #define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
3768 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
3772 mp_size_t alloc; /* for sanity checking only */
3777 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
3779 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
3780 __GMP_DECLSPEC void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
3782 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
3783 __GMP_DECLSPEC void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
3785 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
3786 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3788 #define mpn_hgcd_itch __MPN (hgcd_itch)
3789 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
3791 #define mpn_hgcd __MPN (hgcd)
3792 __GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3794 #define MPN_HGCD_LEHMER_ITCH(n) (n)
3796 #define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
3797 __GMP_DECLSPEC mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
3799 /* Needs storage for the quotient */
3800 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
3802 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
3803 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3805 #define MPN_GCD_LEHMER_N_ITCH(n) (n)
3807 #define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
3808 __GMP_DECLSPEC mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3810 #define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
3811 __GMP_DECLSPEC mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
3813 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
3815 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
3816 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
3818 /* 4*(an + 1) + 4*(bn + 1) + an */
3819 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
3821 #ifndef HGCD_THRESHOLD
3822 #define HGCD_THRESHOLD 400
3825 #ifndef GCD_DC_THRESHOLD
3826 #define GCD_DC_THRESHOLD 1000
3829 #ifndef GCDEXT_DC_THRESHOLD
3830 #define GCDEXT_DC_THRESHOLD 600
3833 /* Definitions for mpn_set_str and mpn_get_str */
3836 mp_ptr p; /* actual power value */
3837 mp_size_t n; /* # of limbs at p */
3838 mp_size_t shift; /* weight of lowest limb, in limb base B */
3839 size_t digits_in_base; /* number of corresponding digits */
3842 typedef struct powers powers_t;
3843 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
3844 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
3845 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
3846 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
3848 #define mpn_dc_set_str __MPN(dc_set_str)
3849 __GMP_DECLSPEC mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
3850 #define mpn_bc_set_str __MPN(bc_set_str)
3851 __GMP_DECLSPEC mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
3852 #define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
3853 __GMP_DECLSPEC void mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
3856 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
3857 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
3858 hence giving back the user's size in bits rounded up. Notice that
3859 converting prec->bits->prec gives an unchanged value. */
3860 #define __GMPF_BITS_TO_PREC(n) \
3861 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
3862 #define __GMPF_PREC_TO_BITS(n) \
3863 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
3865 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
3868 /* Set n to the number of significant digits an mpf of the given _mp_prec
3869 field, in the given base. This is a rounded up value, designed to ensure
3870 there's enough digits to reproduce all the guaranteed part of the value.
3872 There are prec many limbs, but the high might be only "1" so forget it
3873 and just count prec-1 limbs into chars. +1 rounds that upwards, and a
3874 further +1 is because the limbs usually won't fall on digit boundaries.
3876 FIXME: If base is a power of 2 and the bits per digit divides
3877 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for
3878 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
3880 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
3882 ASSERT (base >= 2 && base < numberof (mp_bases)); \
3883 (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS) \
3884 * mp_bases[(base)].chars_per_bit_exactly); \
3888 /* Decimal point string, from the current C locale. Needs <langinfo.h> for
3889 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
3890 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
3891 their respective #if HAVE_FOO_H.
3893 GLIBC recommends nl_langinfo because getting only one facet can be
3894 faster, apparently. */
3896 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
3897 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
3898 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT))
3900 /* RADIXCHAR is deprecated, still in unix98 or some such. */
3901 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
3902 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR))
3904 /* localeconv is slower since it returns all locale stuff */
3905 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
3906 #define GMP_DECIMAL_POINT (localeconv()->decimal_point)
3908 #if ! defined (GMP_DECIMAL_POINT)
3909 #define GMP_DECIMAL_POINT (".")
3913 #define DOPRNT_CONV_FIXED 1
3914 #define DOPRNT_CONV_SCIENTIFIC 2
3915 #define DOPRNT_CONV_GENERAL 3
3917 #define DOPRNT_JUSTIFY_NONE 0
3918 #define DOPRNT_JUSTIFY_LEFT 1
3919 #define DOPRNT_JUSTIFY_RIGHT 2
3920 #define DOPRNT_JUSTIFY_INTERNAL 3
3922 #define DOPRNT_SHOWBASE_YES 1
3923 #define DOPRNT_SHOWBASE_NO 2
3924 #define DOPRNT_SHOWBASE_NONZERO 3
3926 struct doprnt_params_t {
3927 int base; /* negative for upper case */
3928 int conv; /* choices above */
3929 const char *expfmt; /* exponent format */
3930 int exptimes4; /* exponent multiply by 4 */
3931 char fill; /* character */
3932 int justify; /* choices above */
3933 int prec; /* prec field, or -1 for all digits */
3934 int showbase; /* choices above */
3935 int showpoint; /* if radix point always shown */
3936 int showtrailing; /* if trailing zeros wanted */
3937 char sign; /* '+', ' ', or '\0' */
3938 int width; /* width field */
3941 #if _GMP_H_HAVE_VA_LIST
3943 __GMP_DECLSPEC typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
3944 __GMP_DECLSPEC typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
3945 __GMP_DECLSPEC typedef int (*doprnt_reps_t) __GMP_PROTO ((void *, int, int));
3946 __GMP_DECLSPEC typedef int (*doprnt_final_t) __GMP_PROTO ((void *));
3948 struct doprnt_funs_t {
3949 doprnt_format_t format;
3950 doprnt_memory_t memory;
3952 doprnt_final_t final; /* NULL if not required */
3955 extern const struct doprnt_funs_t __gmp_fprintf_funs;
3956 extern const struct doprnt_funs_t __gmp_sprintf_funs;
3957 extern const struct doprnt_funs_t __gmp_snprintf_funs;
3958 extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
3959 extern const struct doprnt_funs_t __gmp_ostream_funs;
3961 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
3962 "size" of these have been written. "alloc > size" is maintained, so
3963 there's room to store a '\0' at the end. "result" is where the
3964 application wants the final block pointer. */
3965 struct gmp_asprintf_t {
3972 #define GMP_ASPRINTF_T_INIT(d, output) \
3974 (d).result = (output); \
3976 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
3980 /* If a realloc is necessary, use twice the size actually required, so as to
3981 avoid repeated small reallocs. */
3982 #define GMP_ASPRINTF_T_NEED(d, n) \
3984 size_t alloc, newsize, newalloc; \
3985 ASSERT ((d)->alloc >= (d)->size + 1); \
3987 alloc = (d)->alloc; \
3988 newsize = (d)->size + (n); \
3989 if (alloc <= newsize) \
3991 newalloc = 2*newsize; \
3992 (d)->alloc = newalloc; \
3993 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
3994 alloc, newalloc, char); \
3998 __GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
3999 __GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
4000 __GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
4002 /* buf is where to write the next output, and size is how much space is left
4003 there. If the application passed size==0 then that's what we'll have
4004 here, and nothing at all should be written. */
4005 struct gmp_snprintf_t {
4010 /* Add the bytes printed by the call to the total retval, or bail out on an
4012 #define DOPRNT_ACCUMULATE(call) \
4020 #define DOPRNT_ACCUMULATE_FUN(fun, params) \
4022 ASSERT ((fun) != NULL); \
4023 DOPRNT_ACCUMULATE ((*(fun)) params); \
4026 #define DOPRNT_FORMAT(fmt, ap) \
4027 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
4028 #define DOPRNT_MEMORY(ptr, len) \
4029 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
4030 #define DOPRNT_REPS(c, n) \
4031 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
4033 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
4035 #define DOPRNT_REPS_MAYBE(c, n) \
4038 DOPRNT_REPS (c, n); \
4040 #define DOPRNT_MEMORY_MAYBE(ptr, len) \
4043 DOPRNT_MEMORY (ptr, len); \
4046 __GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
4047 __GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
4049 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
4050 __GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
4052 __GMP_DECLSPEC int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
4053 #endif /* _GMP_H_HAVE_VA_LIST */
4056 typedef int (*gmp_doscan_scan_t) __GMP_PROTO ((void *, const char *, ...));
4057 typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
4058 typedef int (*gmp_doscan_get_t) __GMP_PROTO ((void *));
4059 typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
4061 struct gmp_doscan_funs_t {
4062 gmp_doscan_scan_t scan;
4063 gmp_doscan_step_t step;
4064 gmp_doscan_get_t get;
4065 gmp_doscan_unget_t unget;
4067 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
4068 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
4070 #if _GMP_H_HAVE_VA_LIST
4071 __GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *, const char *, va_list));
4075 /* For testing and debugging. */
4076 #define MPZ_CHECK_FORMAT(z) \
4078 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
4079 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
4080 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
4083 #define MPQ_CHECK_FORMAT(q) \
4085 MPZ_CHECK_FORMAT (mpq_numref (q)); \
4086 MPZ_CHECK_FORMAT (mpq_denref (q)); \
4087 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
4089 if (SIZ(mpq_numref(q)) == 0) \
4091 /* should have zero as 0/1 */ \
4092 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
4093 && PTR(mpq_denref(q))[0] == 1); \
4097 /* should have no common factors */ \
4100 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
4101 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
4106 #define MPF_CHECK_FORMAT(f) \
4108 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
4109 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
4111 ASSERT_ALWAYS (EXP(f) == 0); \
4113 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
4117 #define MPZ_PROVOKE_REALLOC(z) \
4118 do { ALLOC(z) = ABSIZ(z); } while (0)
4121 /* Enhancement: The "mod" and "gcd_1" functions below could have
4122 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
4123 function pointers, only actual functions. It probably doesn't make much
4124 difference to the gmp code, since hopefully we arrange calls so there's
4125 no great need for the compiler to move things around. */
4127 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
4128 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
4129 in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */
4131 DECL_add_n ((*add_n));
4132 DECL_addmul_1 ((*addmul_1));
4133 DECL_copyd ((*copyd));
4134 DECL_copyi ((*copyi));
4135 DECL_divexact_1 ((*divexact_1));
4136 DECL_divexact_by3c ((*divexact_by3c));
4137 DECL_divrem_1 ((*divrem_1));
4138 DECL_gcd_1 ((*gcd_1));
4139 DECL_lshift ((*lshift));
4140 DECL_mod_1 ((*mod_1));
4141 DECL_mod_34lsub1 ((*mod_34lsub1));
4142 DECL_modexact_1c_odd ((*modexact_1c_odd));
4143 DECL_mul_1 ((*mul_1));
4144 DECL_mul_basecase ((*mul_basecase));
4145 DECL_preinv_divrem_1 ((*preinv_divrem_1));
4146 DECL_preinv_mod_1 ((*preinv_mod_1));
4147 DECL_rshift ((*rshift));
4148 DECL_sqr_basecase ((*sqr_basecase));
4149 DECL_sub_n ((*sub_n));
4150 DECL_submul_1 ((*submul_1));
4152 mp_size_t mul_toom22_threshold;
4153 mp_size_t mul_toom33_threshold;
4154 mp_size_t sqr_toom2_threshold;
4155 mp_size_t sqr_toom3_threshold;
4157 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
4158 #endif /* x86 fat binary */
4160 __GMP_DECLSPEC void __gmpn_cpuvec_init __GMP_PROTO ((void));
4162 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
4163 if that hasn't yet been done (to establish the right values). */
4164 #define CPUVEC_THRESHOLD(field) \
4165 ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
4166 __gmpn_cpuvec.field)
4169 #if HAVE_NATIVE_mpn_add_nc
4170 #define mpn_add_nc __MPN(add_nc)
4171 __GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4175 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4178 co = mpn_add_n (rp, up, vp, n);
4179 co += mpn_add_1 (rp, rp, n, ci);
4184 #if HAVE_NATIVE_mpn_sub_nc
4185 #define mpn_sub_nc __MPN(sub_nc)
4186 __GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
4188 static inline mp_limb_t
4189 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4192 co = mpn_sub_n (rp, up, vp, n);
4193 co += mpn_sub_1 (rp, rp, n, ci);
4199 mpn_zero_p (mp_srcptr ap, mp_size_t n)
4202 for (i = n - 1; i >= 0; i--)
4210 #if TUNE_PROGRAM_BUILD
4211 /* Some extras wanted when recompiling some .c files for use by the tune
4212 program. Not part of a normal build.
4214 It's necessary to keep these thresholds as #defines (just to an
4215 identically named variable), since various defaults are established based
4216 on #ifdef in the .c files. For some this is not so (the defaults are
4217 instead established above), but all are done this way for consistency. */
4219 #undef MUL_TOOM22_THRESHOLD
4220 #define MUL_TOOM22_THRESHOLD mul_toom22_threshold
4221 extern mp_size_t mul_toom22_threshold;
4223 #undef MUL_TOOM33_THRESHOLD
4224 #define MUL_TOOM33_THRESHOLD mul_toom33_threshold
4225 extern mp_size_t mul_toom33_threshold;
4227 #undef MUL_TOOM44_THRESHOLD
4228 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold
4229 extern mp_size_t mul_toom44_threshold;
4231 #undef MUL_TOOM6H_THRESHOLD
4232 #define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold
4233 extern mp_size_t mul_toom6h_threshold;
4235 #undef MUL_TOOM8H_THRESHOLD
4236 #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold
4237 extern mp_size_t mul_toom8h_threshold;
4239 #undef MUL_TOOM32_TO_TOOM43_THRESHOLD
4240 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
4241 extern mp_size_t mul_toom32_to_toom43_threshold;
4243 #undef MUL_TOOM32_TO_TOOM53_THRESHOLD
4244 #define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
4245 extern mp_size_t mul_toom32_to_toom53_threshold;
4247 #undef MUL_TOOM42_TO_TOOM53_THRESHOLD
4248 #define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
4249 extern mp_size_t mul_toom42_to_toom53_threshold;
4251 #undef MUL_TOOM42_TO_TOOM63_THRESHOLD
4252 #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
4253 extern mp_size_t mul_toom42_to_toom63_threshold;
4255 #undef MUL_FFT_THRESHOLD
4256 #define MUL_FFT_THRESHOLD mul_fft_threshold
4257 extern mp_size_t mul_fft_threshold;
4259 #undef MUL_FFT_MODF_THRESHOLD
4260 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
4261 extern mp_size_t mul_fft_modf_threshold;
4263 #undef MUL_FFT_TABLE
4264 #define MUL_FFT_TABLE { 0 }
4266 #undef MUL_FFT_TABLE3
4267 #define MUL_FFT_TABLE3 { {0,0} }
4269 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4270 remain as zero (always use it). */
4271 #if ! HAVE_NATIVE_mpn_sqr_basecase
4272 #undef SQR_BASECASE_THRESHOLD
4273 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold
4274 extern mp_size_t sqr_basecase_threshold;
4277 #if TUNE_PROGRAM_BUILD_SQR
4278 #undef SQR_TOOM2_THRESHOLD
4279 #define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC
4281 #undef SQR_TOOM2_THRESHOLD
4282 #define SQR_TOOM2_THRESHOLD sqr_toom2_threshold
4283 extern mp_size_t sqr_toom2_threshold;
4286 #undef SQR_TOOM3_THRESHOLD
4287 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold
4288 extern mp_size_t sqr_toom3_threshold;
4290 #undef SQR_TOOM4_THRESHOLD
4291 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
4292 extern mp_size_t sqr_toom4_threshold;
4294 #undef SQR_TOOM6_THRESHOLD
4295 #define SQR_TOOM6_THRESHOLD sqr_toom6_threshold
4296 extern mp_size_t sqr_toom6_threshold;
4298 #undef SQR_TOOM8_THRESHOLD
4299 #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold
4300 extern mp_size_t sqr_toom8_threshold;
4302 #undef SQR_FFT_THRESHOLD
4303 #define SQR_FFT_THRESHOLD sqr_fft_threshold
4304 extern mp_size_t sqr_fft_threshold;
4306 #undef SQR_FFT_MODF_THRESHOLD
4307 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
4308 extern mp_size_t sqr_fft_modf_threshold;
4310 #undef SQR_FFT_TABLE
4311 #define SQR_FFT_TABLE { 0 }
4313 #undef SQR_FFT_TABLE3
4314 #define SQR_FFT_TABLE3 { {0,0} }
4316 #undef MULLO_BASECASE_THRESHOLD
4317 #define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold
4318 extern mp_size_t mullo_basecase_threshold;
4320 #undef MULLO_DC_THRESHOLD
4321 #define MULLO_DC_THRESHOLD mullo_dc_threshold
4322 extern mp_size_t mullo_dc_threshold;
4324 #undef MULLO_MUL_N_THRESHOLD
4325 #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold
4326 extern mp_size_t mullo_mul_n_threshold;
4328 #undef DC_DIV_QR_THRESHOLD
4329 #define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
4330 extern mp_size_t dc_div_qr_threshold;
4332 #undef DC_DIVAPPR_Q_THRESHOLD
4333 #define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
4334 extern mp_size_t dc_divappr_q_threshold;
4336 #undef DC_BDIV_Q_THRESHOLD
4337 #define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold
4338 extern mp_size_t dc_bdiv_q_threshold;
4340 #undef DC_BDIV_QR_THRESHOLD
4341 #define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold
4342 extern mp_size_t dc_bdiv_qr_threshold;
4344 #undef MU_DIV_QR_THRESHOLD
4345 #define MU_DIV_QR_THRESHOLD mu_div_qr_threshold
4346 extern mp_size_t mu_div_qr_threshold;
4348 #undef MU_DIVAPPR_Q_THRESHOLD
4349 #define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold
4350 extern mp_size_t mu_divappr_q_threshold;
4352 #undef MUPI_DIV_QR_THRESHOLD
4353 #define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold
4354 extern mp_size_t mupi_div_qr_threshold;
4356 #undef MU_BDIV_QR_THRESHOLD
4357 #define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold
4358 extern mp_size_t mu_bdiv_qr_threshold;
4360 #undef MU_BDIV_Q_THRESHOLD
4361 #define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold
4362 extern mp_size_t mu_bdiv_q_threshold;
4364 #undef INV_MULMOD_BNM1_THRESHOLD
4365 #define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold
4366 extern mp_size_t inv_mulmod_bnm1_threshold;
4368 #undef INV_NEWTON_THRESHOLD
4369 #define INV_NEWTON_THRESHOLD inv_newton_threshold
4370 extern mp_size_t inv_newton_threshold;
4372 #undef INV_APPR_THRESHOLD
4373 #define INV_APPR_THRESHOLD inv_appr_threshold
4374 extern mp_size_t inv_appr_threshold;
4376 #undef BINV_NEWTON_THRESHOLD
4377 #define BINV_NEWTON_THRESHOLD binv_newton_threshold
4378 extern mp_size_t binv_newton_threshold;
4380 #undef REDC_1_TO_REDC_2_THRESHOLD
4381 #define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold
4382 extern mp_size_t redc_1_to_redc_2_threshold;
4384 #undef REDC_2_TO_REDC_N_THRESHOLD
4385 #define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold
4386 extern mp_size_t redc_2_to_redc_n_threshold;
4388 #undef REDC_1_TO_REDC_N_THRESHOLD
4389 #define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold
4390 extern mp_size_t redc_1_to_redc_n_threshold;
4392 #undef MATRIX22_STRASSEN_THRESHOLD
4393 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold
4394 extern mp_size_t matrix22_strassen_threshold;
4396 #undef HGCD_THRESHOLD
4397 #define HGCD_THRESHOLD hgcd_threshold
4398 extern mp_size_t hgcd_threshold;
4400 #undef GCD_DC_THRESHOLD
4401 #define GCD_DC_THRESHOLD gcd_dc_threshold
4402 extern mp_size_t gcd_dc_threshold;
4404 #undef GCDEXT_DC_THRESHOLD
4405 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold
4406 extern mp_size_t gcdext_dc_threshold;
4408 #undef DIVREM_1_NORM_THRESHOLD
4409 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
4410 extern mp_size_t divrem_1_norm_threshold;
4412 #undef DIVREM_1_UNNORM_THRESHOLD
4413 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
4414 extern mp_size_t divrem_1_unnorm_threshold;
4416 #undef MOD_1_NORM_THRESHOLD
4417 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
4418 extern mp_size_t mod_1_norm_threshold;
4420 #undef MOD_1_UNNORM_THRESHOLD
4421 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
4422 extern mp_size_t mod_1_unnorm_threshold;
4424 #undef MOD_1N_TO_MOD_1_1_THRESHOLD
4425 #define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold
4426 extern mp_size_t mod_1n_to_mod_1_1_threshold;
4428 #undef MOD_1U_TO_MOD_1_1_THRESHOLD
4429 #define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold
4430 extern mp_size_t mod_1u_to_mod_1_1_threshold;
4432 #undef MOD_1_1_TO_MOD_1_2_THRESHOLD
4433 #define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold
4434 extern mp_size_t mod_1_1_to_mod_1_2_threshold;
4436 #undef MOD_1_2_TO_MOD_1_4_THRESHOLD
4437 #define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold
4438 extern mp_size_t mod_1_2_to_mod_1_4_threshold;
4440 #undef PREINV_MOD_1_TO_MOD_1_THRESHOLD
4441 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold
4442 extern mp_size_t preinv_mod_1_to_mod_1_threshold;
4444 #if ! UDIV_PREINV_ALWAYS
4445 #undef DIVREM_2_THRESHOLD
4446 #define DIVREM_2_THRESHOLD divrem_2_threshold
4447 extern mp_size_t divrem_2_threshold;
4450 #undef MULMOD_BNM1_THRESHOLD
4451 #define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold
4452 extern mp_size_t mulmod_bnm1_threshold;
4454 #undef SQRMOD_BNM1_THRESHOLD
4455 #define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold
4456 extern mp_size_t sqrmod_bnm1_threshold;
4458 #undef GET_STR_DC_THRESHOLD
4459 #define GET_STR_DC_THRESHOLD get_str_dc_threshold
4460 extern mp_size_t get_str_dc_threshold;
4462 #undef GET_STR_PRECOMPUTE_THRESHOLD
4463 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
4464 extern mp_size_t get_str_precompute_threshold;
4466 #undef SET_STR_DC_THRESHOLD
4467 #define SET_STR_DC_THRESHOLD set_str_dc_threshold
4468 extern mp_size_t set_str_dc_threshold;
4470 #undef SET_STR_PRECOMPUTE_THRESHOLD
4471 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
4472 extern mp_size_t set_str_precompute_threshold;
4474 #undef FFT_TABLE_ATTRS
4475 #define FFT_TABLE_ATTRS
4476 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
4477 #define FFT_TABLE3_SIZE 2000 /* generous space for tuning */
4478 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
4480 /* Sizes the tune program tests up to, used in a couple of recompilations. */
4481 #undef MUL_TOOM22_THRESHOLD_LIMIT
4482 #undef MUL_TOOM33_THRESHOLD_LIMIT
4483 #undef MULLO_BASECASE_THRESHOLD_LIMIT
4484 #undef SQR_TOOM3_THRESHOLD_LIMIT
4485 #define SQR_TOOM2_MAX_GENERIC 200
4486 #define MUL_TOOM22_THRESHOLD_LIMIT 700
4487 #define MUL_TOOM33_THRESHOLD_LIMIT 700
4488 #define SQR_TOOM3_THRESHOLD_LIMIT 400
4489 #define MUL_TOOM44_THRESHOLD_LIMIT 1000
4490 #define SQR_TOOM4_THRESHOLD_LIMIT 1000
4491 #define MUL_TOOM6H_THRESHOLD_LIMIT 1100
4492 #define SQR_TOOM6_THRESHOLD_LIMIT 1100
4493 #define MUL_TOOM8H_THRESHOLD_LIMIT 1200
4494 #define SQR_TOOM8_THRESHOLD_LIMIT 1200
4495 #define MULLO_BASECASE_THRESHOLD_LIMIT 200
4496 #define GET_STR_THRESHOLD_LIMIT 150
4498 #endif /* TUNE_PROGRAM_BUILD */
4500 #if defined (__cplusplus)
4504 /* FIXME: Make these itch functions less conservative. Also consider making
4505 them dependent on just 'an', and compute the allocation directly from 'an'
4506 instead of via n. */
4508 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
4509 k is ths smallest k such that
4510 ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
4512 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
4513 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
4515 #define mpn_toom22_mul_itch(an, bn) \
4516 (2 * ((an) + GMP_NUMB_BITS))
4517 #define mpn_toom2_sqr_itch(an) \
4518 (2 * ((an) + GMP_NUMB_BITS))
4520 /* Can probably be trimmed to 2 an + O(log an). */
4521 #define mpn_toom33_mul_itch(an, bn) \
4522 ((5 * (an) >> 1) + GMP_NUMB_BITS)
4523 #define mpn_toom3_sqr_itch(an) \
4524 ((5 * (an) >> 1) + GMP_NUMB_BITS)
4526 #define mpn_toom44_mul_itch(an, bn) \
4527 (3 * (an) + GMP_NUMB_BITS)
4528 #define mpn_toom4_sqr_itch(an) \
4529 (3 * (an) + GMP_NUMB_BITS)
4531 #define mpn_toom6_sqr_itch(n) \
4532 ( ((n) - SQR_TOOM6_THRESHOLD)*2 + \
4533 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \
4534 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)) )
4536 #define mpn_toom6_mul_n_itch(n) \
4537 ( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
4538 MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
4539 mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
4541 static inline mp_size_t
4542 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
4543 mp_size_t estimatedN;
4544 estimatedN = (an + bn) / (size_t) 10 + 1;
4545 return mpn_toom6_mul_n_itch (estimatedN * 6);
4548 #define mpn_toom8_sqr_itch(n) \
4549 ( (((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \
4550 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
4551 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)) )
4553 #define mpn_toom8_mul_n_itch(n) \
4554 ( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) + \
4555 MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
4556 mpn_toom6_mul_n_itch(MUL_TOOM8H_THRESHOLD)) )
4558 static inline mp_size_t
4559 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
4560 mp_size_t estimatedN;
4561 estimatedN = (an + bn) / (size_t) 14 + 1;
4562 return mpn_toom8_mul_n_itch (estimatedN * 8);
4565 static inline mp_size_t
4566 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
4568 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
4569 mp_size_t itch = 2 * n + 1;
4574 static inline mp_size_t
4575 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
4577 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
4581 static inline mp_size_t
4582 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
4584 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
4589 static inline mp_size_t
4590 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
4592 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
4596 static inline mp_size_t
4597 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
4599 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
4603 static inline mp_size_t
4604 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
4606 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
4610 static inline mp_size_t
4611 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
4613 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
4618 #define mpn_fft_mul mpn_mul_fft_full
4620 #define mpn_fft_mul mpn_nussbaumer_mul
4625 /* A little helper for a null-terminated __gmp_allocate_func string.
4626 The destructor ensures it's freed even if an exception is thrown.
4627 The len field is needed by the destructor, and can be used by anyone else
4628 to avoid a second strlen pass over the data.
4630 Since our input is a C string, using strlen is correct. Perhaps it'd be
4631 more C++-ish style to use std::char_traits<char>::length, but char_traits
4632 isn't available in gcc 2.95.4. */
4634 class gmp_allocated_string {
4638 gmp_allocated_string(char *arg)
4641 len = std::strlen (str);
4643 ~gmp_allocated_string()
4645 (*__gmp_free_func) (str, len+1);
4649 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
4650 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
4651 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
4652 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
4653 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
4654 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
4656 #endif /* __cplusplus */
4658 #endif /* __GMP_IMPL_H__ */