1 /* Include file for internal GNU MP types and definitions.
3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6 Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
7 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software
10 This file is part of the GNU MP Library.
12 The GNU MP Library is free software; you can redistribute it and/or modify
13 it under the terms of the GNU Lesser General Public License as published by
14 the Free Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
17 The GNU MP Library is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
20 License for more details.
22 You should have received a copy of the GNU Lesser General Public License
23 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
26 /* __GMP_DECLSPEC must be given on any global data that will be accessed
27 from outside libgmp, meaning from the test or development programs, or
28 from libgmpxx. Failing to do this will result in an incorrect address
29 being used for the accesses. On functions __GMP_DECLSPEC makes calls
30 from outside libgmp more efficient, but they'll still work fine without
34 #ifndef __GMP_IMPL_H__
35 #define __GMP_IMPL_H__
38 #include <intrinsics.h> /* for _popcnt */
41 /* limits.h is not used in general, since it's an ANSI-ism, and since on
42 solaris gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
43 values (the ABI=64 values).
45 On Cray vector systems, however, we need the system limits.h since sizes
46 of signed and unsigned types can differ there, depending on compiler
47 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For
48 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
49 short can be 24, 32, 46 or 64 bits, and different for ushort. */
55 /* For fat.h and other fat binary stuff.
56 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
57 declared this way are only used to set function pointers in __gmpn_cpuvec,
58 they're not called directly. */
59 #define DECL_add_n(name) \
60 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
61 #define DECL_addlsh1_n(name) \
63 #define DECL_addlsh2_n(name) \
65 #define DECL_addmul_1(name) \
66 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
67 #define DECL_addmul_2(name) \
68 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
69 #define DECL_bdiv_dbm1c(name) \
70 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
71 #define DECL_com(name) \
72 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
73 #define DECL_copyd(name) \
74 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
75 #define DECL_copyi(name) \
77 #define DECL_divexact_1(name) \
78 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
79 #define DECL_divexact_by3c(name) \
80 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
81 #define DECL_divrem_1(name) \
82 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
83 #define DECL_gcd_1(name) \
84 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
85 #define DECL_lshift(name) \
86 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
87 #define DECL_lshiftc(name) \
89 #define DECL_mod_1(name) \
90 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
91 #define DECL_mod_1_1p(name) \
92 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [])
93 #define DECL_mod_1_1p_cps(name) \
94 __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
95 #define DECL_mod_1s_2p(name) \
97 #define DECL_mod_1s_2p_cps(name) \
98 DECL_mod_1_1p_cps (name)
99 #define DECL_mod_1s_4p(name) \
101 #define DECL_mod_1s_4p_cps(name) \
102 DECL_mod_1_1p_cps (name)
103 #define DECL_mod_34lsub1(name) \
104 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
105 #define DECL_modexact_1c_odd(name) \
106 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
107 #define DECL_mul_1(name) \
109 #define DECL_mul_basecase(name) \
110 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
111 #define DECL_mullo_basecase(name) \
112 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
113 #define DECL_preinv_divrem_1(name) \
114 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
115 #define DECL_preinv_mod_1(name) \
116 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
117 #define DECL_redc_1(name) \
118 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
119 #define DECL_redc_2(name) \
120 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
121 #define DECL_rshift(name) \
123 #define DECL_sqr_basecase(name) \
124 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
125 #define DECL_sub_n(name) \
127 #define DECL_sublsh1_n(name) \
129 #define DECL_submul_1(name) \
132 #if ! defined (__GMP_WITHIN_CONFIGURE)
134 #include "gmp-mparam.h"
135 #include "fib_table.h"
136 #include "fac_table.h"
137 #include "mp_bases.h"
143 #if HAVE_INTTYPES_H /* for uint_least32_t */
144 # include <inttypes.h>
152 #include <cstring> /* for strlen */
153 #include <string> /* for std::string */
157 #ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */
158 #define WANT_TMP_DEBUG 0
161 /* The following tries to get a good version of alloca. The tests are
162 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
163 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
164 be setup appropriately.
166 ifndef alloca - a cpp define might already exist.
167 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
168 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
170 GCC __builtin_alloca - preferred whenever available.
172 _AIX pragma - IBM compilers need a #pragma in "each module that needs to
173 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was
174 used in past versions of GMP, retained still in case it matters.
176 The autoconf manual says this pragma needs to be at the start of a C
177 file, apart from comments and preprocessor directives. Is that true?
178 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
184 # define alloca __builtin_alloca
187 # define alloca(x) __ALLOCA(x)
191 # define alloca _alloca
196 # if defined (_AIX) || defined (_IBMR2)
208 /* if not provided by gmp-mparam.h */
209 #ifndef BYTES_PER_MP_LIMB
210 #define BYTES_PER_MP_LIMB SIZEOF_MP_LIMB_T
212 #define GMP_LIMB_BYTES BYTES_PER_MP_LIMB
213 #ifndef GMP_LIMB_BITS
214 #define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T)
217 #define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG)
220 /* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
221 #if HAVE_UINT_LEAST32_T
222 typedef uint_least32_t gmp_uint_least32_t;
224 #if SIZEOF_UNSIGNED_SHORT >= 4
225 typedef unsigned short gmp_uint_least32_t;
227 #if SIZEOF_UNSIGNED >= 4
228 typedef unsigned gmp_uint_least32_t;
230 typedef unsigned long gmp_uint_least32_t;
236 /* gmp_intptr_t, for pointer to integer casts */
238 typedef intptr_t gmp_intptr_t;
240 typedef size_t gmp_intptr_t;
244 /* pre-inverse types for truncating division and modulo */
245 typedef struct {mp_limb_t inv32;} gmp_pi1_t;
246 typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
249 /* "const" basically means a function does nothing but examine its arguments
250 and give a return value, it doesn't read or write any memory (neither
251 global nor pointed to by arguments), and has no other side-effects. This
252 is more restrictive than "pure". See info node "(gcc)Function
253 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
254 this off when trying to write timing loops. */
255 #if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
256 #define ATTRIBUTE_CONST __attribute__ ((const))
258 #define ATTRIBUTE_CONST
261 #if HAVE_ATTRIBUTE_NORETURN
262 #define ATTRIBUTE_NORETURN __attribute__ ((noreturn))
264 #define ATTRIBUTE_NORETURN
267 /* "malloc" means a function behaves like malloc in that the pointer it
268 returns doesn't alias anything. */
269 #if HAVE_ATTRIBUTE_MALLOC
270 #define ATTRIBUTE_MALLOC __attribute__ ((malloc))
272 #define ATTRIBUTE_MALLOC
277 #define strchr(s,c) index(s,c)
281 #define memset(p, c, n) \
284 char *__memset__p = (p); \
286 for (__i = 0; __i < (n); __i++) \
287 __memset__p[__i] = (c); \
291 /* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
292 mode. Falling back to a memcpy will give maximum portability, since it
293 works no matter whether va_list is a pointer, struct or array. */
294 #if ! defined (va_copy) && defined (__va_copy)
295 #define va_copy(dst,src) __va_copy(dst,src)
297 #if ! defined (va_copy)
298 #define va_copy(dst,src) \
299 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
303 /* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
304 (ie. ctlz, ctpop, cttz). */
305 #if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \
306 || HAVE_HOST_CPU_alphaev7
307 #define HAVE_HOST_CPU_alpha_CIX 1
311 #if defined (__cplusplus)
318 ptr = TMP_ALLOC (bytes);
321 Small allocations should use TMP_SALLOC, big allocations should use
322 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
324 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
327 TMP_DECL just declares a variable, but might be empty and so must be last
328 in a list of variables. TMP_MARK must be done before any TMP_ALLOC.
329 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a
330 TMP_MARK was made, but then no TMP_ALLOCs. */
332 /* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
333 __gmp_allocate_func doesn't already determine it. Currently TMP_ALLOC
334 isn't used for "double"s, so that's not in the union. */
339 #define __TMP_ALIGN sizeof (union tmp_align_t)
341 /* Return "a" rounded upwards to a multiple of "m", if it isn't already.
342 "a" must be an unsigned type.
343 This is designed for use with a compile-time constant "m".
344 The POW2 case is expected to be usual, and gcc 3.0 and up recognises
345 "(-(8*n))%8" or the like is always zero, which means the rounding up in
346 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */
347 #define ROUND_UP_MULTIPLE(a,m) \
348 (POW2_P(m) ? (a) + (-(a))%(m) \
349 : (a)+(m)-1 - (((a)+(m)-1) % (m)))
351 #if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
352 struct tmp_reentrant_t {
353 struct tmp_reentrant_t *next;
354 size_t size; /* bytes, including header */
356 __GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
357 __GMP_DECLSPEC void __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
362 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
364 #define TMP_MARK __tmp_marker = 0
365 #define TMP_SALLOC(n) alloca(n)
366 #define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
367 #define TMP_ALLOC(n) \
368 (LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
372 if (UNLIKELY (__tmp_marker != 0)) \
373 __gmp_tmp_reentrant_free (__tmp_marker); \
377 #if WANT_TMP_REENTRANT
378 #define TMP_SDECL TMP_DECL
379 #define TMP_DECL struct tmp_reentrant_t *__tmp_marker
380 #define TMP_SMARK TMP_MARK
381 #define TMP_MARK __tmp_marker = 0
382 #define TMP_SALLOC(n) TMP_ALLOC(n)
383 #define TMP_BALLOC(n) TMP_ALLOC(n)
384 #define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n)
385 #define TMP_SFREE TMP_FREE
386 #define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker)
389 #if WANT_TMP_NOTREENTRANT
392 struct tmp_stack *which_chunk;
395 __GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
396 __GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
397 __GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
398 #define TMP_SDECL TMP_DECL
399 #define TMP_DECL struct tmp_marker __tmp_marker
400 #define TMP_SMARK TMP_MARK
401 #define TMP_MARK __gmp_tmp_mark (&__tmp_marker)
402 #define TMP_SALLOC(n) TMP_ALLOC(n)
403 #define TMP_BALLOC(n) TMP_ALLOC(n)
404 #define TMP_ALLOC(n) \
405 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
406 #define TMP_SFREE TMP_FREE
407 #define TMP_FREE __gmp_tmp_free (&__tmp_marker)
411 /* See tal-debug.c for some comments. */
413 struct tmp_debug_entry_t *list;
417 struct tmp_debug_entry_t {
418 struct tmp_debug_entry_t *next;
422 __GMP_DECLSPEC void __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
423 struct tmp_debug_t *,
424 const char *, const char *);
425 __GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
426 struct tmp_debug_t **, const char *,
427 size_t) ATTRIBUTE_MALLOC;
428 __GMP_DECLSPEC void __gmp_tmp_debug_free (const char *, int, int,
429 struct tmp_debug_t **,
430 const char *, const char *);
431 #define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
432 #define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
433 #define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
434 #define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
435 #define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
436 #define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
437 /* The marker variable is designed to provoke an uninitialized variable
438 warning from the compiler if TMP_FREE is used without a TMP_MARK.
439 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
440 these things up too. */
441 #define TMP_DECL_NAME(marker, marker_name) \
443 int __tmp_marker_inscope; \
444 const char *__tmp_marker_name = marker_name; \
445 struct tmp_debug_t __tmp_marker_struct; \
446 /* don't demand NULL, just cast a zero */ \
447 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
448 #define TMP_MARK_NAME(marker, marker_name) \
451 __tmp_marker_inscope = 1; \
452 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
453 &__tmp_marker, &__tmp_marker_struct, \
454 __tmp_marker_name, marker_name); \
456 #define TMP_SALLOC(n) TMP_ALLOC(n)
457 #define TMP_BALLOC(n) TMP_ALLOC(n)
458 #define TMP_ALLOC(size) \
459 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
460 __tmp_marker_inscope, \
461 &__tmp_marker, __tmp_marker_name, size)
462 #define TMP_FREE_NAME(marker, marker_name) \
464 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
465 marker, &__tmp_marker, \
466 __tmp_marker_name, marker_name); \
468 #endif /* WANT_TMP_DEBUG */
471 /* Allocating various types. */
472 #define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
473 #define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
474 #define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
475 #define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
476 #define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t)
477 #define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t)
478 #define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
479 #define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr)
480 #define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr)
482 /* It's more efficient to allocate one block than two. This is certainly
483 true of the malloc methods, but it can even be true of alloca if that
484 involves copying a chunk of stack (various RISCs), or a call to a stack
485 bounds check (mingw). In any case, when debugging keep separate blocks
486 so a redzoning malloc debugger can protect each individually. */
487 #define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
489 if (WANT_TMP_DEBUG) \
491 (xp) = TMP_ALLOC_LIMBS (xsize); \
492 (yp) = TMP_ALLOC_LIMBS (ysize); \
496 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
497 (yp) = (xp) + (xsize); \
502 /* From gmp.h, nicer names for internal use. */
503 #define CRAY_Pragma(str) __GMP_CRAY_Pragma(str)
504 #define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size)
505 #define LIKELY(cond) __GMP_LIKELY(cond)
506 #define UNLIKELY(cond) __GMP_UNLIKELY(cond)
508 #define ABS(x) ((x) >= 0 ? (x) : -(x))
509 #define ABS_CAST(T,x) ((x) >= 0 ? (T)(x) : -((T)((x) + 1) - 1))
511 #define MIN(l,o) ((l) < (o) ? (l) : (o))
513 #define MAX(h,i) ((h) > (i) ? (h) : (i))
514 #define numberof(x) (sizeof (x) / sizeof ((x)[0]))
516 /* Field access macros. */
517 #define SIZ(x) ((x)->_mp_size)
518 #define ABSIZ(x) ABS (SIZ (x))
519 #define PTR(x) ((x)->_mp_d)
520 #define EXP(x) ((x)->_mp_exp)
521 #define PREC(x) ((x)->_mp_prec)
522 #define ALLOC(x) ((x)->_mp_alloc)
523 #define NUM(x) mpq_numref(x)
524 #define DEN(x) mpq_denref(x)
526 /* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
527 then that lowest one bit must have been the only bit set. n==0 will
528 return true though, so avoid that. */
529 #define POW2_P(n) (((n) & ((n) - 1)) == 0)
531 /* This is intended for constant THRESHOLDs only, where the compiler
532 can completely fold the result. */
534 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
535 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
536 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
537 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
539 /* The "short" defines are a bit different because shorts are promoted to
542 #ifndef's are used since on some systems (HP?) header files other than
543 limits.h setup these defines. We could forcibly #undef in that case, but
544 there seems no need to worry about that. */
547 #define ULONG_MAX __GMP_ULONG_MAX
550 #define UINT_MAX __GMP_UINT_MAX
553 #define USHRT_MAX __GMP_USHRT_MAX
555 #define MP_LIMB_T_MAX (~ (mp_limb_t) 0)
557 /* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
558 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc
559 treats the plain decimal values in <limits.h> as signed. */
560 #define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
561 #define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
562 #define USHRT_HIGHBIT ((unsigned short) (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)))
563 #define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
566 #define LONG_MIN ((long) ULONG_HIGHBIT)
569 #define LONG_MAX (-(LONG_MIN+1))
573 #define INT_MIN ((int) UINT_HIGHBIT)
576 #define INT_MAX (-(INT_MIN+1))
580 #define SHRT_MIN ((short) USHRT_HIGHBIT)
583 #define SHRT_MAX ((short) (-(SHRT_MIN+1)))
586 #if __GMP_MP_SIZE_T_INT
587 #define MP_SIZE_T_MAX INT_MAX
588 #define MP_SIZE_T_MIN INT_MIN
590 #define MP_SIZE_T_MAX LONG_MAX
591 #define MP_SIZE_T_MIN LONG_MIN
594 /* mp_exp_t is the same as mp_size_t */
595 #define MP_EXP_T_MAX MP_SIZE_T_MAX
596 #define MP_EXP_T_MIN MP_SIZE_T_MIN
598 #define LONG_HIGHBIT LONG_MIN
599 #define INT_HIGHBIT INT_MIN
600 #define SHRT_HIGHBIT SHRT_MIN
603 #define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
605 #if GMP_NAIL_BITS == 0
606 #define GMP_NAIL_LOWBIT CNST_LIMB(0)
608 #define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS)
611 #if GMP_NAIL_BITS != 0
612 /* Set various *_THRESHOLD values to be used for nails. Thus we avoid using
613 code that has not yet been qualified. */
615 #undef DC_DIV_QR_THRESHOLD
616 #define DC_DIV_QR_THRESHOLD 50
618 #undef DIVREM_1_NORM_THRESHOLD
619 #undef DIVREM_1_UNNORM_THRESHOLD
620 #undef MOD_1_NORM_THRESHOLD
621 #undef MOD_1_UNNORM_THRESHOLD
622 #undef USE_PREINV_DIVREM_1
623 #undef DIVREM_2_THRESHOLD
624 #undef DIVEXACT_1_THRESHOLD
625 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
626 #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
627 #define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
628 #define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */
629 #define USE_PREINV_DIVREM_1 0 /* no preinv */
630 #define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */
632 /* mpn/generic/mul_fft.c is not nails-capable. */
633 #undef MUL_FFT_THRESHOLD
634 #undef SQR_FFT_THRESHOLD
635 #define MUL_FFT_THRESHOLD MP_SIZE_T_MAX
636 #define SQR_FFT_THRESHOLD MP_SIZE_T_MAX
641 #define MP_LIMB_T_SWAP(x, y) \
643 mp_limb_t __mp_limb_t_swap__tmp = (x); \
645 (y) = __mp_limb_t_swap__tmp; \
647 #define MP_SIZE_T_SWAP(x, y) \
649 mp_size_t __mp_size_t_swap__tmp = (x); \
651 (y) = __mp_size_t_swap__tmp; \
654 #define MP_PTR_SWAP(x, y) \
656 mp_ptr __mp_ptr_swap__tmp = (x); \
658 (y) = __mp_ptr_swap__tmp; \
660 #define MP_SRCPTR_SWAP(x, y) \
662 mp_srcptr __mp_srcptr_swap__tmp = (x); \
664 (y) = __mp_srcptr_swap__tmp; \
667 #define MPN_PTR_SWAP(xp,xs, yp,ys) \
669 MP_PTR_SWAP (xp, yp); \
670 MP_SIZE_T_SWAP (xs, ys); \
672 #define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
674 MP_SRCPTR_SWAP (xp, yp); \
675 MP_SIZE_T_SWAP (xs, ys); \
678 #define MPZ_PTR_SWAP(x, y) \
680 mpz_ptr __mpz_ptr_swap__tmp = (x); \
682 (y) = __mpz_ptr_swap__tmp; \
684 #define MPZ_SRCPTR_SWAP(x, y) \
686 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
688 (y) = __mpz_srcptr_swap__tmp; \
692 /* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
693 but current gcc (3.0) doesn't seem to support that. */
694 __GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
695 __GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
696 __GMP_DECLSPEC extern void (*__gmp_free_func) (void *, size_t);
698 __GMP_DECLSPEC void *__gmp_default_allocate (size_t);
699 __GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
700 __GMP_DECLSPEC void __gmp_default_free (void *, size_t);
702 #define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
703 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
704 #define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
706 #define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
707 ((type *) (*__gmp_reallocate_func) \
708 (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
709 #define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
710 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
712 #define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
713 #define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
715 #define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
717 if ((oldsize) != (newsize)) \
718 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
721 #define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
723 if ((oldsize) != (newsize)) \
724 (ptr) = (type *) (*__gmp_reallocate_func) \
725 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
729 /* Dummy for non-gcc, code involving it will go dead. */
730 #if ! defined (__GNUC__) || __GNUC__ < 2
731 #define __builtin_constant_p(x) 0
735 /* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
736 stack usage is compatible. __attribute__ ((regparm (N))) helps by
737 putting leading parameters in registers, avoiding extra stack.
739 regparm cannot be used with calls going through the PLT, because the
740 binding code there may clobber the registers (%eax, %edx, %ecx) used for
741 the regparm parameters. Calls to local (ie. static) functions could
742 still use this, if we cared to differentiate locals and globals.
744 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
745 -p or -pg profiling, since that version of gcc doesn't realize the
746 .mcount calls will clobber the parameter registers. Other systems are
747 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
748 bother to try to detect this. regparm is only an optimization so we just
749 disable it when profiling (profiling being a slowdown anyway). */
751 #if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
752 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
753 #define USE_LEADING_REGPARM 1
755 #define USE_LEADING_REGPARM 0
758 /* Macros for altering parameter order according to regparm usage. */
759 #if USE_LEADING_REGPARM
760 #define REGPARM_2_1(a,b,x) x,a,b
761 #define REGPARM_3_1(a,b,c,x) x,a,b,c
762 #define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
764 #define REGPARM_2_1(a,b,x) a,b,x
765 #define REGPARM_3_1(a,b,c,x) a,b,c,x
766 #define REGPARM_ATTR(n)
770 /* ASM_L gives a local label for a gcc asm block, for use when temporary
771 local labels like "1:" might not be available, which is the case for
772 instance on the x86s (the SCO assembler doesn't support them).
774 The label generated is made unique by including "%=" which is a unique
775 number for each insn. This ensures the same name can be used in multiple
776 asm blocks, perhaps via a macro. Since jumps between asm blocks are not
777 allowed there's no need for a label to be usable outside a single
780 #define ASM_L(name) LSYM_PREFIX "asm_%=_" #name
783 #if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
785 /* FIXME: Check that these actually improve things.
786 FIXME: Need a cld after each std.
787 FIXME: Can't have inputs in clobbered registers, must describe them as
788 dummy outputs, and add volatile. */
789 #define MPN_COPY_INCR(DST, SRC, N) \
790 __asm__ ("cld\n\trep\n\tmovsl" : : \
791 "D" (DST), "S" (SRC), "c" (N) : \
792 "cx", "di", "si", "memory")
793 #define MPN_COPY_DECR(DST, SRC, N) \
794 __asm__ ("std\n\trep\n\tmovsl" : : \
795 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
796 "cx", "di", "si", "memory")
801 __GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
802 #define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
804 #define mpz_n_pow_ui __gmpz_n_pow_ui
805 __GMP_DECLSPEC void mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
808 #define mpn_addmul_1c __MPN(addmul_1c)
809 __GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
811 #ifndef mpn_addmul_2 /* if not done with cpuvec in a fat binary */
812 #define mpn_addmul_2 __MPN(addmul_2)
813 __GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
816 #define mpn_addmul_3 __MPN(addmul_3)
817 __GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
819 #define mpn_addmul_4 __MPN(addmul_4)
820 __GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
822 #define mpn_addmul_5 __MPN(addmul_5)
823 __GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
825 #define mpn_addmul_6 __MPN(addmul_6)
826 __GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
828 #define mpn_addmul_7 __MPN(addmul_7)
829 __GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
831 #define mpn_addmul_8 __MPN(addmul_8)
832 __GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
834 /* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */
835 #define mpn_addmul_2s __MPN(addmul_2s)
836 __GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
838 /* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
839 returns the carry out (0, 1 or 2). Use _ip1 when a=c. */
840 #ifndef mpn_addlsh1_n /* if not done with cpuvec in a fat binary */
841 #define mpn_addlsh1_n __MPN(addlsh1_n)
842 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
844 #define mpn_addlsh1_nc __MPN(addlsh1_nc)
845 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
846 #if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
847 #define mpn_addlsh1_n_ip1(dst,src,n) mpn_addlsh1_n(dst,dst,src,n)
848 #define HAVE_NATIVE_mpn_addlsh1_n_ip1 1
850 #define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
851 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
853 #if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
854 #define mpn_addlsh1_nc_ip1(dst,src,n,c) mpn_addlsh1_nc(dst,dst,src,n,c)
855 #define HAVE_NATIVE_mpn_addlsh1_nc_ip1 1
857 #define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
858 __GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
861 #ifndef mpn_addlsh2_n /* if not done with cpuvec in a fat binary */
862 /* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
863 returns the carry out (0, ..., 4). Use _ip1 when a=c. */
864 #define mpn_addlsh2_n __MPN(addlsh2_n)
865 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
867 #define mpn_addlsh2_nc __MPN(addlsh2_nc)
868 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
869 #if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
870 #define mpn_addlsh2_n_ip1(dst,src,n) mpn_addlsh2_n(dst,dst,src,n)
871 #define HAVE_NATIVE_mpn_addlsh2_n_ip1 1
873 #define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
874 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
876 #if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
877 #define mpn_addlsh2_nc_ip1(dst,src,n,c) mpn_addlsh2_nc(dst,dst,src,n,c)
878 #define HAVE_NATIVE_mpn_addlsh2_nc_ip1 1
880 #define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
881 __GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
884 /* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
885 returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
886 #define mpn_addlsh_n __MPN(addlsh_n)
887 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
888 #define mpn_addlsh_nc __MPN(addlsh_nc)
889 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
890 #if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh_n_ip1
891 #define mpn_addlsh_n_ip1(dst,src,n,s) mpn_addlsh_n(dst,dst,src,n,s)
892 #define HAVE_NATIVE_mpn_addlsh_n_ip1 1
894 #define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
895 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
897 #if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh_nc_ip1
898 #define mpn_addlsh_nc_ip1(dst,src,n,s,c) mpn_addlsh_nc(dst,dst,src,n,s,c)
899 #define HAVE_NATIVE_mpn_addlsh_nc_ip1 1
901 #define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
902 __GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
905 #ifndef mpn_sublsh1_n /* if not done with cpuvec in a fat binary */
906 /* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
907 returns the borrow out (0, 1 or 2). Use _ip1 when a=c. */
908 #define mpn_sublsh1_n __MPN(sublsh1_n)
909 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
911 #define mpn_sublsh1_nc __MPN(sublsh1_nc)
912 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
913 #if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
914 #define mpn_sublsh1_n_ip1(dst,src,n) mpn_sublsh1_n(dst,dst,src,n)
915 #define HAVE_NATIVE_mpn_sublsh1_n_ip1 1
917 #define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
918 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
920 #if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
921 #define mpn_sublsh1_nc_ip1(dst,src,n,c) mpn_sublsh1_nc(dst,dst,src,n,c)
922 #define HAVE_NATIVE_mpn_sublsh1_nc_ip1 1
924 #define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
925 __GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
928 /* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
929 returns the carry out (-1, 0, 1). */
930 #define mpn_rsblsh1_n __MPN(rsblsh1_n)
931 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
932 #define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
933 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
935 /* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
936 returns the borrow out (0, ..., 4). Use _ip1 when a=c. */
937 #define mpn_sublsh2_n __MPN(sublsh2_n)
938 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
939 #define mpn_sublsh2_nc __MPN(sublsh2_nc)
940 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
941 #if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
942 #define mpn_sublsh2_n_ip1(dst,src,n) mpn_sublsh2_n(dst,dst,src,n)
943 #define HAVE_NATIVE_mpn_sublsh2_n_ip1 1
945 #define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
946 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
948 #if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
949 #define mpn_sublsh2_nc_ip1(dst,src,n,c) mpn_sublsh2_nc(dst,dst,src,n,c)
950 #define HAVE_NATIVE_mpn_sublsh2_nc_ip1 1
952 #define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
953 __GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
956 /* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and
957 returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
958 #define mpn_sublsh_n __MPN(sublsh_n)
959 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
960 #if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh_n_ip1
961 #define mpn_sublsh_n_ip1(dst,src,n,s) mpn_sublsh_n(dst,dst,src,n,s)
962 #define HAVE_NATIVE_mpn_sublsh_n_ip1 1
964 #define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
965 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
967 #if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh_nc_ip1
968 #define mpn_sublsh_nc_ip1(dst,src,n,s,c) mpn_sublsh_nc(dst,dst,src,n,s,c)
969 #define HAVE_NATIVE_mpn_sublsh_nc_ip1 1
971 #define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
972 __GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
975 /* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
976 returns the carry out (-1, ..., 3). */
977 #define mpn_rsblsh2_n __MPN(rsblsh2_n)
978 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
979 #define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
980 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
982 /* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
983 returns the carry out (-1, 0, ..., 2^k-1). */
984 #define mpn_rsblsh_n __MPN(rsblsh_n)
985 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
986 #define mpn_rsblsh_nc __MPN(rsblsh_nc)
987 __GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
989 /* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
990 and returns the bit rshifted out (0 or 1). */
991 #define mpn_rsh1add_n __MPN(rsh1add_n)
992 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
993 #define mpn_rsh1add_nc __MPN(rsh1add_nc)
994 __GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
996 /* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
997 and returns the bit rshifted out (0 or 1). If there's a borrow from the
998 subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
999 complement negative. */
1000 #define mpn_rsh1sub_n __MPN(rsh1sub_n)
1001 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1002 #define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
1003 __GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1005 #ifndef mpn_lshiftc /* if not done with cpuvec in a fat binary */
1006 #define mpn_lshiftc __MPN(lshiftc)
1007 __GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1010 #define mpn_add_err1_n __MPN(add_err1_n)
1011 __GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1013 #define mpn_add_err2_n __MPN(add_err2_n)
1014 __GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1016 #define mpn_add_err3_n __MPN(add_err3_n)
1017 __GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1019 #define mpn_sub_err1_n __MPN(sub_err1_n)
1020 __GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1022 #define mpn_sub_err2_n __MPN(sub_err2_n)
1023 __GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1025 #define mpn_sub_err3_n __MPN(sub_err3_n)
1026 __GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1028 #define mpn_add_n_sub_n __MPN(add_n_sub_n)
1029 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1031 #define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
1032 __GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1034 #define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
1035 __GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1037 #define mpn_divrem_1c __MPN(divrem_1c)
1038 __GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1040 #define mpn_dump __MPN(dump)
1041 __GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
1043 #define mpn_fib2_ui __MPN(fib2_ui)
1044 __GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
1046 /* Remap names of internal mpn functions. */
1047 #define __clz_tab __MPN(clz_tab)
1048 #define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
1050 #define mpn_jacobi_base __MPN(jacobi_base)
1051 __GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
1053 #define mpn_jacobi_2 __MPN(jacobi_2)
1054 __GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
1056 #define mpn_jacobi_n __MPN(jacobi_n)
1057 __GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
1059 #define mpn_mod_1c __MPN(mod_1c)
1060 __GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
1062 #define mpn_mul_1c __MPN(mul_1c)
1063 __GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1065 #define mpn_mul_2 __MPN(mul_2)
1066 __GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1068 #define mpn_mul_3 __MPN(mul_3)
1069 __GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1071 #define mpn_mul_4 __MPN(mul_4)
1072 __GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1074 #define mpn_mul_5 __MPN(mul_5)
1075 __GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1077 #define mpn_mul_6 __MPN(mul_6)
1078 __GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1080 #ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
1081 #define mpn_mul_basecase __MPN(mul_basecase)
1082 __GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1085 #define mpn_mullo_n __MPN(mullo_n)
1086 __GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1088 #ifndef mpn_mullo_basecase /* if not done with cpuvec in a fat binary */
1089 #define mpn_mullo_basecase __MPN(mullo_basecase)
1090 __GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1093 #define mpn_sqr __MPN(sqr)
1094 __GMP_DECLSPEC void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
1096 #ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
1097 #define mpn_sqr_basecase __MPN(sqr_basecase)
1098 __GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
1101 #define mpn_mulmid_basecase __MPN(mulmid_basecase)
1102 __GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1104 #define mpn_mulmid_n __MPN(mulmid_n)
1105 __GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1107 #define mpn_mulmid __MPN(mulmid)
1108 __GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1110 #define mpn_submul_1c __MPN(submul_1c)
1111 __GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1113 #ifndef mpn_redc_1 /* if not done with cpuvec in a fat binary */
1114 #define mpn_redc_1 __MPN(redc_1)
1115 __GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1118 #ifndef mpn_redc_2 /* if not done with cpuvec in a fat binary */
1119 #define mpn_redc_2 __MPN(redc_2)
1120 __GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1123 #define mpn_redc_n __MPN(redc_n)
1124 __GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1127 #ifndef mpn_mod_1_1p_cps /* if not done with cpuvec in a fat binary */
1128 #define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
1129 __GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
1131 #ifndef mpn_mod_1_1p /* if not done with cpuvec in a fat binary */
1132 #define mpn_mod_1_1p __MPN(mod_1_1p)
1133 __GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
1136 #ifndef mpn_mod_1s_2p_cps /* if not done with cpuvec in a fat binary */
1137 #define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
1138 __GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
1140 #ifndef mpn_mod_1s_2p /* if not done with cpuvec in a fat binary */
1141 #define mpn_mod_1s_2p __MPN(mod_1s_2p)
1142 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
1145 #ifndef mpn_mod_1s_3p_cps /* if not done with cpuvec in a fat binary */
1146 #define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
1147 __GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
1149 #ifndef mpn_mod_1s_3p /* if not done with cpuvec in a fat binary */
1150 #define mpn_mod_1s_3p __MPN(mod_1s_3p)
1151 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
1154 #ifndef mpn_mod_1s_4p_cps /* if not done with cpuvec in a fat binary */
1155 #define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
1156 __GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
1158 #ifndef mpn_mod_1s_4p /* if not done with cpuvec in a fat binary */
1159 #define mpn_mod_1s_4p __MPN(mod_1s_4p)
1160 __GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
1163 #define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
1164 __GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1165 #define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
1166 __GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1167 #define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
1168 __GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1169 static inline mp_size_t
1170 mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
1174 (an > n ? (bn > n ? rn : n) : 0);
1178 #define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
1179 __GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1180 #define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
1181 __GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1182 static inline mp_size_t
1183 mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
1191 typedef __gmp_randstate_struct *gmp_randstate_ptr;
1192 typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
1194 /* Pseudo-random number generator function pointers structure. */
1196 void (*randseed_fn) (gmp_randstate_t, mpz_srcptr);
1197 void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int);
1198 void (*randclear_fn) (gmp_randstate_t);
1199 void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
1202 /* Macro to obtain a void pointer to the function pointers structure. */
1203 #define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
1205 /* Macro to obtain a pointer to the generator's state.
1206 When used as a lvalue the rvalue needs to be cast to mp_ptr. */
1207 #define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
1209 /* Write a given number of random bits to rp. */
1210 #define _gmp_rand(rp, state, bits) \
1212 gmp_randstate_ptr __rstate = (state); \
1213 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
1214 (__rstate, rp, bits); \
1217 __GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t);
1220 /* __gmp_rands is the global state for the old-style random functions, and
1221 is also used in the test programs (hence the __GMP_DECLSPEC).
1223 There's no seeding here, so mpz_random etc will generate the same
1224 sequence every time. This is not unlike the C library random functions
1225 if you don't seed them, so perhaps it's acceptable. Digging up a seed
1226 from /dev/random or the like would work on many systems, but might
1227 encourage a false confidence, since it'd be pretty much impossible to do
1228 something that would work reliably everywhere. In any case the new style
1229 functions are recommended to applications which care about randomness, so
1230 the old functions aren't too important. */
1232 __GMP_DECLSPEC extern char __gmp_rands_initialized;
1233 __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
1236 ((__gmp_rands_initialized ? 0 \
1237 : (__gmp_rands_initialized = 1, \
1238 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
1241 /* this is used by the test programs, to free memory */
1242 #define RANDS_CLEAR() \
1244 if (__gmp_rands_initialized) \
1246 __gmp_rands_initialized = 0; \
1247 gmp_randclear (__gmp_rands); \
1252 /* For a threshold between algorithms A and B, size>=thresh is where B
1253 should be used. Special value MP_SIZE_T_MAX means only ever use A, or
1254 value 0 means only ever use B. The tests for these special values will
1255 be compile-time constants, so the compiler should be able to eliminate
1256 the code for the unwanted algorithm. */
1258 #if ! defined (__GNUC__) || __GNUC__ < 2
1259 #define ABOVE_THRESHOLD(size,thresh) \
1261 || ((thresh) != MP_SIZE_T_MAX \
1262 && (size) >= (thresh)))
1264 #define ABOVE_THRESHOLD(size,thresh) \
1265 ((__builtin_constant_p (thresh) && (thresh) == 0) \
1266 || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX) \
1267 && (size) >= (thresh)))
1269 #define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
1271 #define MPN_TOOM22_MUL_MINSIZE 4
1272 #define MPN_TOOM2_SQR_MINSIZE 4
1274 #define MPN_TOOM33_MUL_MINSIZE 17
1275 #define MPN_TOOM3_SQR_MINSIZE 17
1277 #define MPN_TOOM44_MUL_MINSIZE 30
1278 #define MPN_TOOM4_SQR_MINSIZE 30
1280 #define MPN_TOOM6H_MUL_MINSIZE 46
1281 #define MPN_TOOM6_SQR_MINSIZE 46
1283 #define MPN_TOOM8H_MUL_MINSIZE 86
1284 #define MPN_TOOM8_SQR_MINSIZE 86
1286 #define MPN_TOOM32_MUL_MINSIZE 10
1287 #define MPN_TOOM42_MUL_MINSIZE 10
1288 #define MPN_TOOM43_MUL_MINSIZE 25
1289 #define MPN_TOOM53_MUL_MINSIZE 17
1290 #define MPN_TOOM54_MUL_MINSIZE 31
1291 #define MPN_TOOM63_MUL_MINSIZE 49
1293 #define MPN_TOOM42_MULMID_MINSIZE 4
1295 #define mpn_sqr_diagonal __MPN(sqr_diagonal)
1296 __GMP_DECLSPEC void mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
1298 #define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
1299 __GMP_DECLSPEC void mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1301 #define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1302 __GMP_DECLSPEC void mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
1304 enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
1305 #define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
1306 __GMP_DECLSPEC void mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
1308 enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
1309 #define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1310 __GMP_DECLSPEC void mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1312 #define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
1313 __GMP_DECLSPEC void mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1315 #define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
1316 __GMP_DECLSPEC void mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1318 #define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
1319 __GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1321 #define mpn_toom_couple_handling __MPN(toom_couple_handling)
1322 __GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
1324 #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
1325 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1327 #define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
1328 __GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1330 #define mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
1331 __GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1333 #define mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
1334 __GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1336 #define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
1337 __GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1339 #define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
1340 __GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1342 #define mpn_toom22_mul __MPN(toom22_mul)
1343 __GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1345 #define mpn_toom32_mul __MPN(toom32_mul)
1346 __GMP_DECLSPEC void mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1348 #define mpn_toom42_mul __MPN(toom42_mul)
1349 __GMP_DECLSPEC void mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1351 #define mpn_toom52_mul __MPN(toom52_mul)
1352 __GMP_DECLSPEC void mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1354 #define mpn_toom62_mul __MPN(toom62_mul)
1355 __GMP_DECLSPEC void mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1357 #define mpn_toom2_sqr __MPN(toom2_sqr)
1358 __GMP_DECLSPEC void mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1360 #define mpn_toom33_mul __MPN(toom33_mul)
1361 __GMP_DECLSPEC void mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1363 #define mpn_toom43_mul __MPN(toom43_mul)
1364 __GMP_DECLSPEC void mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1366 #define mpn_toom53_mul __MPN(toom53_mul)
1367 __GMP_DECLSPEC void mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1369 #define mpn_toom54_mul __MPN(toom54_mul)
1370 __GMP_DECLSPEC void mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1372 #define mpn_toom63_mul __MPN(toom63_mul)
1373 __GMP_DECLSPEC void mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1375 #define mpn_toom3_sqr __MPN(toom3_sqr)
1376 __GMP_DECLSPEC void mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1378 #define mpn_toom44_mul __MPN(toom44_mul)
1379 __GMP_DECLSPEC void mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1381 #define mpn_toom4_sqr __MPN(toom4_sqr)
1382 __GMP_DECLSPEC void mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1384 #define mpn_toom6h_mul __MPN(toom6h_mul)
1385 __GMP_DECLSPEC void mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1387 #define mpn_toom6_sqr __MPN(toom6_sqr)
1388 __GMP_DECLSPEC void mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1390 #define mpn_toom8h_mul __MPN(toom8h_mul)
1391 __GMP_DECLSPEC void mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1393 #define mpn_toom8_sqr __MPN(toom8_sqr)
1394 __GMP_DECLSPEC void mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1396 #define mpn_toom42_mulmid __MPN(toom42_mulmid)
1397 __GMP_DECLSPEC void mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1399 #define mpn_fft_best_k __MPN(fft_best_k)
1400 __GMP_DECLSPEC int mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
1402 #define mpn_mul_fft __MPN(mul_fft)
1403 __GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
1405 #define mpn_mul_fft_full __MPN(mul_fft_full)
1406 __GMP_DECLSPEC void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1408 #define mpn_nussbaumer_mul __MPN(nussbaumer_mul)
1409 __GMP_DECLSPEC void mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1411 #define mpn_fft_next_size __MPN(fft_next_size)
1412 __GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
1414 #define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
1415 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
1417 #define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
1418 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
1420 #define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
1421 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1423 #define mpn_sbpi1_div_q __MPN(sbpi1_div_q)
1424 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1426 #define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
1427 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1429 #define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
1430 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1431 #define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
1432 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
1434 #define mpn_dcpi1_div_q __MPN(dcpi1_div_q)
1435 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1437 #define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
1438 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1439 #define mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n)
1440 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
1442 #define mpn_mu_div_qr __MPN(mu_div_qr)
1443 __GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1444 #define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1445 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int);
1446 #define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
1447 __GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int);
1449 #define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1450 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1451 #define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
1452 __GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t);
1454 #define mpn_mu_divappr_q __MPN(mu_divappr_q)
1455 __GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1456 #define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1457 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int);
1458 #define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
1459 __GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int);
1461 #define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
1462 __GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1464 #define mpn_mu_div_q __MPN(mu_div_q)
1465 __GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1466 #define mpn_mu_div_q_itch __MPN(mu_div_q_itch)
1467 __GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int);
1469 #define mpn_div_q __MPN(div_q)
1470 __GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1472 #define mpn_invert __MPN(invert)
1473 __GMP_DECLSPEC void mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1474 #define mpn_invert_itch(n) mpn_invertappr_itch(n)
1476 #define mpn_ni_invertappr __MPN(ni_invertappr)
1477 __GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1478 #define mpn_invertappr __MPN(invertappr)
1479 __GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1480 #define mpn_invertappr_itch(n) (3 * (n) + 2)
1482 #define mpn_binvert __MPN(binvert)
1483 __GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1484 #define mpn_binvert_itch __MPN(binvert_itch)
1485 __GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t);
1487 #define mpn_bdiv_q_1 __MPN(bdiv_q_1)
1488 __GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1490 #define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
1491 __GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
1493 #define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
1494 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1496 #define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
1497 __GMP_DECLSPEC void mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1499 #define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
1500 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1501 #define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
1502 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t);
1504 #define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
1505 __GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1506 #define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
1507 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1509 #define mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch)
1510 __GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch (mp_size_t);
1511 #define mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n)
1512 __GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1514 #define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1515 __GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1516 #define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1517 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t);
1519 #define mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1520 __GMP_DECLSPEC void mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1521 #define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1522 __GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t);
1524 #define mpn_bdiv_qr __MPN(bdiv_qr)
1525 __GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1526 #define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
1527 __GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t);
1529 #define mpn_bdiv_q __MPN(bdiv_q)
1530 __GMP_DECLSPEC void mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1531 #define mpn_bdiv_q_itch __MPN(bdiv_q_itch)
1532 __GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t);
1534 #define mpn_divexact __MPN(divexact)
1535 __GMP_DECLSPEC void mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1536 #define mpn_divexact_itch __MPN(divexact_itch)
1537 __GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t);
1539 #ifndef mpn_bdiv_dbm1c /* if not done with cpuvec in a fat binary */
1540 #define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1541 __GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1544 #define mpn_bdiv_dbm1(dst, src, size, divisor) \
1545 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1547 #define mpn_powm __MPN(powm)
1548 __GMP_DECLSPEC void mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1549 #define mpn_powlo __MPN(powlo)
1550 __GMP_DECLSPEC void mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1551 #define mpn_powm_sec __MPN(powm_sec)
1552 __GMP_DECLSPEC void mpn_powm_sec (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1553 #define mpn_powm_sec_itch __MPN(powm_sec_itch)
1554 __GMP_DECLSPEC mp_size_t mpn_powm_sec_itch (mp_size_t, mp_size_t, mp_size_t);
1555 #define mpn_tabselect __MPN(tabselect)
1556 __GMP_DECLSPEC void mpn_tabselect (volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t);
1557 #define mpn_addcnd_n __MPN(addcnd_n)
1558 __GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1559 #define mpn_subcnd_n __MPN(subcnd_n)
1560 __GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1562 #define mpn_sb_div_qr_sec __MPN(sb_div_qr_sec)
1563 __GMP_DECLSPEC void mpn_sb_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1564 #define mpn_sbpi1_div_qr_sec __MPN(sbpi1_div_qr_sec)
1565 __GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1566 #define mpn_sb_div_r_sec __MPN(sb_div_r_sec)
1567 __GMP_DECLSPEC void mpn_sb_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1568 #define mpn_sbpi1_div_r_sec __MPN(sbpi1_div_r_sec)
1569 __GMP_DECLSPEC void mpn_sbpi1_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1572 #ifndef DIVEXACT_BY3_METHOD
1573 #if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1574 #define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */
1576 #define DIVEXACT_BY3_METHOD 1
1580 #if DIVEXACT_BY3_METHOD == 0
1581 #undef mpn_divexact_by3
1582 #define mpn_divexact_by3(dst,src,size) \
1583 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1584 /* override mpn_divexact_by3c defined in gmp.h */
1586 #undef mpn_divexact_by3c
1587 #define mpn_divexact_by3c(dst,src,size,cy) \
1588 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1592 #if GMP_NUMB_BITS % 4 == 0
1593 #define mpn_divexact_by5(dst,src,size) \
1594 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1597 #if GMP_NUMB_BITS % 3 == 0
1598 #define mpn_divexact_by7(dst,src,size) \
1599 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1602 #if GMP_NUMB_BITS % 6 == 0
1603 #define mpn_divexact_by9(dst,src,size) \
1604 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1607 #if GMP_NUMB_BITS % 10 == 0
1608 #define mpn_divexact_by11(dst,src,size) \
1609 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1612 #if GMP_NUMB_BITS % 12 == 0
1613 #define mpn_divexact_by13(dst,src,size) \
1614 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1617 #if GMP_NUMB_BITS % 4 == 0
1618 #define mpn_divexact_by15(dst,src,size) \
1619 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1622 #define mpz_divexact_gcd __gmpz_divexact_gcd
1623 __GMP_DECLSPEC void mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
1625 #define mpz_prodlimbs __gmpz_prodlimbs
1626 __GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
1628 #define mpz_oddfac_1 __gmpz_oddfac_1
1629 __GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
1631 #define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1632 #ifdef _GMP_H_HAVE_FILE
1633 __GMP_DECLSPEC size_t mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
1636 #define mpn_divisible_p __MPN(divisible_p)
1637 __GMP_DECLSPEC int mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
1639 #define mpn_rootrem __MPN(rootrem)
1640 __GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1642 #define mpn_broot __MPN(broot)
1643 __GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1645 #define mpn_broot_invm1 __MPN(broot_invm1)
1646 __GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1648 #define mpn_brootinv __MPN(brootinv)
1649 __GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1651 #define mpn_bsqrt __MPN(bsqrt)
1652 __GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1654 #define mpn_bsqrtinv __MPN(bsqrtinv)
1655 __GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1658 #define MPN_COPY_INCR(dst, src, n) \
1660 int __i; /* Faster on some Crays with plain int */ \
1661 _Pragma ("_CRI ivdep"); \
1662 for (__i = 0; __i < (n); __i++) \
1663 (dst)[__i] = (src)[__i]; \
1667 /* used by test programs, hence __GMP_DECLSPEC */
1668 #ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
1669 #define mpn_copyi __MPN(copyi)
1670 __GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
1673 #if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1674 #define MPN_COPY_INCR(dst, src, size) \
1676 ASSERT ((size) >= 0); \
1677 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
1678 mpn_copyi (dst, src, size); \
1682 /* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
1683 #if ! defined (MPN_COPY_INCR)
1684 #define MPN_COPY_INCR(dst, src, n) \
1686 ASSERT ((n) >= 0); \
1687 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
1690 mp_size_t __n = (n) - 1; \
1691 mp_ptr __dst = (dst); \
1692 mp_srcptr __src = (src); \
1711 #define MPN_COPY_DECR(dst, src, n) \
1713 int __i; /* Faster on some Crays with plain int */ \
1714 _Pragma ("_CRI ivdep"); \
1715 for (__i = (n) - 1; __i >= 0; __i--) \
1716 (dst)[__i] = (src)[__i]; \
1720 /* used by test programs, hence __GMP_DECLSPEC */
1721 #ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
1722 #define mpn_copyd __MPN(copyd)
1723 __GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
1726 #if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1727 #define MPN_COPY_DECR(dst, src, size) \
1729 ASSERT ((size) >= 0); \
1730 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
1731 mpn_copyd (dst, src, size); \
1735 /* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
1736 #if ! defined (MPN_COPY_DECR)
1737 #define MPN_COPY_DECR(dst, src, n) \
1739 ASSERT ((n) >= 0); \
1740 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
1743 mp_size_t __n = (n) - 1; \
1744 mp_ptr __dst = (dst) + __n; \
1745 mp_srcptr __src = (src) + __n; \
1764 #define MPN_COPY(d,s,n) \
1766 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
1767 MPN_COPY_INCR (d, s, n); \
1772 /* Set {dst,size} to the limbs of {src,size} in reverse order. */
1773 #define MPN_REVERSE(dst, src, size) \
1775 mp_ptr __dst = (dst); \
1776 mp_size_t __size = (size); \
1777 mp_srcptr __src = (src) + __size - 1; \
1779 ASSERT ((size) >= 0); \
1780 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
1781 CRAY_Pragma ("_CRI ivdep"); \
1782 for (__i = 0; __i < __size; __i++) \
1791 /* Zero n limbs at dst.
1793 For power and powerpc we want an inline stu/bdnz loop for zeroing. On
1794 ppc630 for instance this is optimal since it can sustain only 1 store per
1797 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1798 "for" loop in the generic code below can become stu/bdnz. The do/while
1799 here helps it get to that. The same caveat about plain -mpowerpc64 mode
1800 applies here as to __GMPN_COPY_INCR in gmp.h.
1802 xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1805 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1806 at a time. MPN_ZERO isn't all that important in GMP, so it might be more
1807 trouble than it's worth to do the same, though perhaps a call to memset
1808 would be good when on a GNU system. */
1810 #if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1811 #define MPN_ZERO(dst, n) \
1813 ASSERT ((n) >= 0); \
1816 mp_ptr __dst = (dst) - 1; \
1817 mp_size_t __n = (n); \
1826 #define MPN_ZERO(dst, n) \
1828 ASSERT ((n) >= 0); \
1831 mp_ptr __dst = (dst); \
1832 mp_size_t __n = (n); \
1841 /* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1842 start up and would need to strip a lot of zeros before it'd be faster
1843 than a simple cmpl loop. Here are some times in cycles for
1844 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1853 #ifndef MPN_NORMALIZE
1854 #define MPN_NORMALIZE(DST, NLIMBS) \
1856 while ((NLIMBS) > 0) \
1858 if ((DST)[(NLIMBS) - 1] != 0) \
1864 #ifndef MPN_NORMALIZE_NOT_ZERO
1865 #define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
1869 ASSERT ((NLIMBS) >= 1); \
1870 if ((DST)[(NLIMBS) - 1] != 0) \
1877 /* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1878 and decrementing size. low should be ptr[0], and will be the new ptr[0]
1879 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
1880 somewhere a non-zero limb. */
1881 #define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
1883 ASSERT ((size) >= 1); \
1884 ASSERT ((low) == (ptr)[0]); \
1886 while ((low) == 0) \
1889 ASSERT ((size) >= 1); \
1895 /* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
1896 temporary variable; it will be automatically cleared out at function
1897 return. We use __x here to make it possible to accept both mpz_ptr and
1899 #define MPZ_TMP_INIT(X, NLIMBS) \
1901 mpz_ptr __x = (X); \
1902 ASSERT ((NLIMBS) >= 1); \
1903 __x->_mp_alloc = (NLIMBS); \
1904 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \
1908 static inline void *
1909 _mpz_newalloc (mpz_ptr z, mp_size_t n)
1911 void * res = _mpz_realloc(z,n);
1912 /* If we are checking the code, force a random change to limbs. */
1913 ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
1917 #define _mpz_newalloc _mpz_realloc
1919 /* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
1920 #define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
1921 ? (mp_ptr) _mpz_realloc(z,n) \
1923 #define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
1924 ? (mp_ptr) _mpz_newalloc(z,n) \
1927 #define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
1930 /* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
1933 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
1934 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the
1935 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
1937 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
1938 without good floating point. There's +2 for rounding up, and a further
1939 +2 since at the last step x limbs are doubled into a 2x+1 limb region
1940 whereas the actual F[2k] value might be only 2x-1 limbs.
1942 Note that a division is done first, since on a 32-bit system it's at
1943 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be
1944 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
1945 whatever a multiply of two 190Mbyte numbers takes.)
1947 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
1948 worked into the multiplier. */
1950 #define MPN_FIB2_SIZE(n) \
1951 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
1954 /* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range
1955 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
1957 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
1958 F[n] + 2*F[n-1] fits in a limb. */
1960 __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
1961 #define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
1963 extern const mp_limb_t __gmp_oddfac_table[];
1964 extern const mp_limb_t __gmp_odd2fac_table[];
1965 extern const unsigned char __gmp_fac2cnt_table[];
1966 extern const mp_limb_t __gmp_limbroots_table[];
1968 /* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
1969 static inline unsigned
1970 log_n_max (mp_limb_t n)
1973 for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
1977 #define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */
1980 unsigned long d; /* current index in s[] */
1981 unsigned long s0; /* number corresponding to s[0] */
1982 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */
1983 unsigned char s[SIEVESIZE + 1]; /* sieve table */
1986 #define gmp_init_primesieve __gmp_init_primesieve
1987 __GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
1989 #define gmp_nextprime __gmp_nextprime
1990 __GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
1992 #define gmp_primesieve __gmp_primesieve
1993 __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
1996 #ifndef MUL_TOOM22_THRESHOLD
1997 #define MUL_TOOM22_THRESHOLD 30
2000 #ifndef MUL_TOOM33_THRESHOLD
2001 #define MUL_TOOM33_THRESHOLD 100
2004 #ifndef MUL_TOOM44_THRESHOLD
2005 #define MUL_TOOM44_THRESHOLD 300
2008 #ifndef MUL_TOOM6H_THRESHOLD
2009 #define MUL_TOOM6H_THRESHOLD 350
2012 #ifndef SQR_TOOM6_THRESHOLD
2013 #define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
2016 #ifndef MUL_TOOM8H_THRESHOLD
2017 #define MUL_TOOM8H_THRESHOLD 450
2020 #ifndef SQR_TOOM8_THRESHOLD
2021 #define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
2024 #ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
2025 #define MUL_TOOM32_TO_TOOM43_THRESHOLD 100
2028 #ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
2029 #define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
2032 #ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
2033 #define MUL_TOOM42_TO_TOOM53_THRESHOLD 100
2036 #ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
2037 #define MUL_TOOM42_TO_TOOM63_THRESHOLD 110
2040 #ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
2041 #define MUL_TOOM43_TO_TOOM54_THRESHOLD 150
2044 /* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a
2045 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat
2046 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
2047 separate hard limit will have been defined. Similarly for TOOM3. */
2048 #ifndef MUL_TOOM22_THRESHOLD_LIMIT
2049 #define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD
2051 #ifndef MUL_TOOM33_THRESHOLD_LIMIT
2052 #define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD
2054 #ifndef MULLO_BASECASE_THRESHOLD_LIMIT
2055 #define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD
2058 /* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
2059 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we
2060 certainly always want it if there's a native assembler mpn_sqr_basecase.)
2062 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
2063 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
2064 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less
2065 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
2066 should be used, and that may be never. */
2068 #ifndef SQR_BASECASE_THRESHOLD
2069 #define SQR_BASECASE_THRESHOLD 0
2072 #ifndef SQR_TOOM2_THRESHOLD
2073 #define SQR_TOOM2_THRESHOLD 50
2076 #ifndef SQR_TOOM3_THRESHOLD
2077 #define SQR_TOOM3_THRESHOLD 120
2080 #ifndef SQR_TOOM4_THRESHOLD
2081 #define SQR_TOOM4_THRESHOLD 400
2084 /* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */
2085 #ifndef SQR_TOOM3_THRESHOLD_LIMIT
2086 #define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
2089 #ifndef MULMID_TOOM42_THRESHOLD
2090 #define MULMID_TOOM42_THRESHOLD MUL_TOOM22_THRESHOLD
2093 #ifndef DC_DIV_QR_THRESHOLD
2094 #define DC_DIV_QR_THRESHOLD 50
2097 #ifndef DC_DIVAPPR_Q_THRESHOLD
2098 #define DC_DIVAPPR_Q_THRESHOLD 200
2101 #ifndef DC_BDIV_QR_THRESHOLD
2102 #define DC_BDIV_QR_THRESHOLD 50
2105 #ifndef DC_BDIV_Q_THRESHOLD
2106 #define DC_BDIV_Q_THRESHOLD 180
2109 #ifndef DIVEXACT_JEB_THRESHOLD
2110 #define DIVEXACT_JEB_THRESHOLD 25
2113 #ifndef INV_MULMOD_BNM1_THRESHOLD
2114 #define INV_MULMOD_BNM1_THRESHOLD (5*MULMOD_BNM1_THRESHOLD)
2117 #ifndef INV_APPR_THRESHOLD
2118 #define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD
2121 #ifndef INV_NEWTON_THRESHOLD
2122 #define INV_NEWTON_THRESHOLD 200
2125 #ifndef BINV_NEWTON_THRESHOLD
2126 #define BINV_NEWTON_THRESHOLD 300
2129 #ifndef MU_DIVAPPR_Q_THRESHOLD
2130 #define MU_DIVAPPR_Q_THRESHOLD 2000
2133 #ifndef MU_DIV_QR_THRESHOLD
2134 #define MU_DIV_QR_THRESHOLD 2000
2137 #ifndef MUPI_DIV_QR_THRESHOLD
2138 #define MUPI_DIV_QR_THRESHOLD 200
2141 #ifndef MU_BDIV_Q_THRESHOLD
2142 #define MU_BDIV_Q_THRESHOLD 2000
2145 #ifndef MU_BDIV_QR_THRESHOLD
2146 #define MU_BDIV_QR_THRESHOLD 2000
2149 #ifndef MULMOD_BNM1_THRESHOLD
2150 #define MULMOD_BNM1_THRESHOLD 16
2153 #ifndef SQRMOD_BNM1_THRESHOLD
2154 #define SQRMOD_BNM1_THRESHOLD 16
2157 #ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
2158 #define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2)
2161 #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
2163 #ifndef REDC_1_TO_REDC_2_THRESHOLD
2164 #define REDC_1_TO_REDC_2_THRESHOLD 15
2166 #ifndef REDC_2_TO_REDC_N_THRESHOLD
2167 #define REDC_2_TO_REDC_N_THRESHOLD 100
2172 #ifndef REDC_1_TO_REDC_N_THRESHOLD
2173 #define REDC_1_TO_REDC_N_THRESHOLD 100
2176 #endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
2179 /* First k to use for an FFT modF multiply. A modF FFT is an order
2180 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
2181 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
2182 #define FFT_FIRST_K 4
2184 /* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
2185 #ifndef MUL_FFT_MODF_THRESHOLD
2186 #define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3)
2188 #ifndef SQR_FFT_MODF_THRESHOLD
2189 #define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
2192 /* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
2193 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
2194 NxN->2N multiply and not recursing into itself is an order
2195 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
2196 is the first better than toom3. */
2197 #ifndef MUL_FFT_THRESHOLD
2198 #define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
2200 #ifndef SQR_FFT_THRESHOLD
2201 #define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
2204 /* Table of thresholds for successive modF FFT "k"s. The first entry is
2205 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
2206 etc. See mpn_fft_best_k(). */
2207 #ifndef MUL_FFT_TABLE
2208 #define MUL_FFT_TABLE \
2209 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \
2210 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \
2211 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \
2212 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \
2213 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \
2214 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \
2217 #ifndef SQR_FFT_TABLE
2218 #define SQR_FFT_TABLE \
2219 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
2220 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
2221 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
2222 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
2223 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
2224 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
2234 #ifndef FFT_TABLE_ATTRS
2235 #define FFT_TABLE_ATTRS static const
2238 #define MPN_FFT_TABLE_SIZE 16
2241 #ifndef DC_DIV_QR_THRESHOLD
2242 #define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD)
2245 #ifndef GET_STR_DC_THRESHOLD
2246 #define GET_STR_DC_THRESHOLD 18
2249 #ifndef GET_STR_PRECOMPUTE_THRESHOLD
2250 #define GET_STR_PRECOMPUTE_THRESHOLD 35
2253 #ifndef SET_STR_DC_THRESHOLD
2254 #define SET_STR_DC_THRESHOLD 750
2257 #ifndef SET_STR_PRECOMPUTE_THRESHOLD
2258 #define SET_STR_PRECOMPUTE_THRESHOLD 2000
2261 #ifndef FAC_ODD_THRESHOLD
2262 #define FAC_ODD_THRESHOLD 35
2265 #ifndef FAC_DSC_THRESHOLD
2266 #define FAC_DSC_THRESHOLD 400
2269 /* Return non-zero if xp,xsize and yp,ysize overlap.
2270 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
2271 overlap. If both these are false, there's an overlap. */
2272 #define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
2273 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
2274 #define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
2275 ( (char *) (xp) + (xsize) > (char *) (yp) \
2276 && (char *) (yp) + (ysize) > (char *) (xp))
2278 /* Return non-zero if xp,xsize and yp,ysize are either identical or not
2279 overlapping. Return zero if they're partially overlapping. */
2280 #define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
2281 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
2282 #define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
2283 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
2285 /* Return non-zero if dst,dsize and src,ssize are either identical or
2286 overlapping in a way suitable for an incrementing/decrementing algorithm.
2287 Return zero if they're partially overlapping in an unsuitable fashion. */
2288 #define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
2289 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2290 #define MPN_SAME_OR_INCR_P(dst, src, size) \
2291 MPN_SAME_OR_INCR2_P(dst, size, src, size)
2292 #define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
2293 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2294 #define MPN_SAME_OR_DECR_P(dst, src, size) \
2295 MPN_SAME_OR_DECR2_P(dst, size, src, size)
2298 /* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
2299 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
2300 does it always. Generally assertions are meant for development, but
2301 might help when looking for a problem later too.
2303 Note that strings shouldn't be used within the ASSERT expression,
2304 eg. ASSERT(strcmp(s,"notgood")!=0), since the quotes upset the "expr"
2305 used in the !HAVE_STRINGIZE case (ie. K&R). */
2308 #define ASSERT_LINE __LINE__
2310 #define ASSERT_LINE -1
2314 #define ASSERT_FILE __FILE__
2316 #define ASSERT_FILE ""
2319 __GMP_DECLSPEC void __gmp_assert_header (const char *, int);
2320 __GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
2323 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
2325 #define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
2328 #define ASSERT_ALWAYS(expr) \
2330 if (UNLIKELY (!(expr))) \
2331 ASSERT_FAIL (expr); \
2335 #define ASSERT(expr) ASSERT_ALWAYS (expr)
2337 #define ASSERT(expr) do {} while (0)
2341 /* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
2342 that it's zero. In both cases if assertion checking is disabled the
2343 expression is still evaluated. These macros are meant for use with
2344 routines like mpn_add_n() where the return value represents a carry or
2345 whatever that should or shouldn't occur in some context. For example,
2346 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
2348 #define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0)
2349 #define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
2351 #define ASSERT_CARRY(expr) (expr)
2352 #define ASSERT_NOCARRY(expr) (expr)
2356 /* ASSERT_CODE includes code when assertion checking is wanted. This is the
2357 same as writing "#if WANT_ASSERT", but more compact. */
2359 #define ASSERT_CODE(expr) expr
2361 #define ASSERT_CODE(expr)
2365 /* Test that an mpq_t is in fully canonical form. This can be used as
2366 protection on routines like mpq_equal which give wrong results on
2367 non-canonical inputs. */
2369 #define ASSERT_MPQ_CANONICAL(q) \
2371 ASSERT (q->_mp_den._mp_size > 0); \
2372 if (q->_mp_num._mp_size == 0) \
2374 /* zero should be 0/1 */ \
2375 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
2379 /* no common factors */ \
2382 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
2383 ASSERT (mpz_cmp_ui (__g, 1) == 0); \
2388 #define ASSERT_MPQ_CANONICAL(q) do {} while (0)
2391 /* Check that the nail parts are zero. */
2392 #define ASSERT_ALWAYS_LIMB(limb) \
2394 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
2395 ASSERT_ALWAYS (__nail == 0); \
2397 #define ASSERT_ALWAYS_MPN(ptr, size) \
2399 /* let whole loop go dead when no nails */ \
2400 if (GMP_NAIL_BITS != 0) \
2403 for (__i = 0; __i < (size); __i++) \
2404 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
2408 #define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
2409 #define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size)
2411 #define ASSERT_LIMB(limb) do {} while (0)
2412 #define ASSERT_MPN(ptr, size) do {} while (0)
2416 /* Assert that an mpn region {ptr,size} is zero, or non-zero.
2417 size==0 is allowed, and in that case {ptr,size} considered to be zero. */
2419 #define ASSERT_MPN_ZERO_P(ptr,size) \
2422 ASSERT ((size) >= 0); \
2423 for (__i = 0; __i < (size); __i++) \
2424 ASSERT ((ptr)[__i] == 0); \
2426 #define ASSERT_MPN_NONZERO_P(ptr,size) \
2429 int __nonzero = 0; \
2430 ASSERT ((size) >= 0); \
2431 for (__i = 0; __i < (size); __i++) \
2432 if ((ptr)[__i] != 0) \
2437 ASSERT (__nonzero); \
2440 #define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
2441 #define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0)
2445 #if ! HAVE_NATIVE_mpn_com
2447 #define mpn_com(d,s,n) \
2450 mp_srcptr __s = (s); \
2451 mp_size_t __n = (n); \
2452 ASSERT (__n >= 1); \
2453 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
2455 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
2460 #define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \
2462 mp_srcptr __up = (up); \
2463 mp_srcptr __vp = (vp); \
2464 mp_ptr __rp = (rp); \
2465 mp_size_t __n = (n); \
2466 mp_limb_t __a, __b; \
2468 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \
2469 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \
2477 __rp[__n] = operation; \
2482 #if ! HAVE_NATIVE_mpn_and_n
2484 #define mpn_and_n(rp, up, vp, n) \
2485 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
2488 #if ! HAVE_NATIVE_mpn_andn_n
2490 #define mpn_andn_n(rp, up, vp, n) \
2491 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
2494 #if ! HAVE_NATIVE_mpn_nand_n
2496 #define mpn_nand_n(rp, up, vp, n) \
2497 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
2500 #if ! HAVE_NATIVE_mpn_ior_n
2502 #define mpn_ior_n(rp, up, vp, n) \
2503 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
2506 #if ! HAVE_NATIVE_mpn_iorn_n
2508 #define mpn_iorn_n(rp, up, vp, n) \
2509 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
2512 #if ! HAVE_NATIVE_mpn_nior_n
2514 #define mpn_nior_n(rp, up, vp, n) \
2515 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
2518 #if ! HAVE_NATIVE_mpn_xor_n
2520 #define mpn_xor_n(rp, up, vp, n) \
2521 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
2524 #if ! HAVE_NATIVE_mpn_xnor_n
2526 #define mpn_xnor_n(rp, up, vp, n) \
2527 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
2530 #define mpn_trialdiv __MPN(trialdiv)
2531 __GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
2533 #define mpn_remove __MPN(remove)
2534 __GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t);
2537 /* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2538 #if GMP_NAIL_BITS == 0
2539 #define ADDC_LIMB(cout, w, x, y) \
2541 mp_limb_t __x = (x); \
2542 mp_limb_t __y = (y); \
2543 mp_limb_t __w = __x + __y; \
2545 (cout) = __w < __x; \
2548 #define ADDC_LIMB(cout, w, x, y) \
2554 (w) = __w & GMP_NUMB_MASK; \
2555 (cout) = __w >> GMP_NUMB_BITS; \
2559 /* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2561 #if GMP_NAIL_BITS == 0
2562 #define SUBC_LIMB(cout, w, x, y) \
2564 mp_limb_t __x = (x); \
2565 mp_limb_t __y = (y); \
2566 mp_limb_t __w = __x - __y; \
2568 (cout) = __w > __x; \
2571 #define SUBC_LIMB(cout, w, x, y) \
2573 mp_limb_t __w = (x) - (y); \
2574 (w) = __w & GMP_NUMB_MASK; \
2575 (cout) = __w >> (GMP_LIMB_BITS-1); \
2580 /* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2581 expecting no carry (or borrow) from that.
2583 The size parameter is only for the benefit of assertion checking. In a
2584 normal build it's unused and the carry/borrow is just propagated as far
2587 On random data, usually only one or two limbs of {ptr,size} get updated,
2588 so there's no need for any sophisticated looping, just something compact
2591 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2592 declaring their operand sizes, then remove the former. This is purely
2593 for the benefit of assertion checking. */
2595 #if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM) \
2596 && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
2598 /* Better flags handling than the generic C gives on i386, saving a few
2599 bytes of code and maybe a cycle or two. */
2601 #define MPN_IORD_U(ptr, incr, aors) \
2603 mp_ptr __ptr_dummy; \
2604 if (__builtin_constant_p (incr) && (incr) == 0) \
2607 else if (__builtin_constant_p (incr) && (incr) == 1) \
2609 __asm__ __volatile__ \
2610 ("\n" ASM_L(top) ":\n" \
2611 "\t" aors "\t$1, (%0)\n" \
2612 "\tlea\t%c2(%0), %0\n" \
2613 "\tjc\t" ASM_L(top) \
2614 : "=r" (__ptr_dummy) \
2615 : "0" (ptr), "n" (sizeof(mp_limb_t)) \
2620 __asm__ __volatile__ \
2621 ( aors "\t%2, (%0)\n" \
2622 "\tjnc\t" ASM_L(done) "\n" \
2624 "\t" aors "\t$1, %c3(%0)\n" \
2625 "\tlea\t%c3(%0), %0\n" \
2626 "\tjc\t" ASM_L(top) "\n" \
2628 : "=r" (__ptr_dummy) \
2630 "ri" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t)) \
2635 #if GMP_LIMB_BITS == 32
2636 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
2637 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
2639 #if GMP_LIMB_BITS == 64
2640 #define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addq")
2641 #define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subq")
2643 #define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
2644 #define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
2647 #if GMP_NAIL_BITS == 0
2649 #define mpn_incr_u(p,incr) \
2653 if (__builtin_constant_p (incr) && (incr) == 1) \
2655 while (++(*(__p++)) == 0) \
2660 __x = *__p + (incr); \
2663 while (++(*(++__p)) == 0) \
2669 #define mpn_decr_u(p,incr) \
2673 if (__builtin_constant_p (incr) && (incr) == 1) \
2675 while ((*(__p++))-- == 0) \
2681 *__p = __x - (incr); \
2683 while ((*(++__p))-- == 0) \
2690 #if GMP_NAIL_BITS >= 1
2692 #define mpn_incr_u(p,incr) \
2696 if (__builtin_constant_p (incr) && (incr) == 1) \
2700 __x = (*__p + 1) & GMP_NUMB_MASK; \
2707 __x = (*__p + (incr)); \
2708 *__p++ = __x & GMP_NUMB_MASK; \
2709 if (__x >> GMP_NUMB_BITS != 0) \
2713 __x = (*__p + 1) & GMP_NUMB_MASK; \
2722 #define mpn_decr_u(p,incr) \
2726 if (__builtin_constant_p (incr) && (incr) == 1) \
2731 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2737 __x = *__p - (incr); \
2738 *__p++ = __x & GMP_NUMB_MASK; \
2739 if (__x >> GMP_NUMB_BITS != 0) \
2744 *__p++ = (__x - 1) & GMP_NUMB_MASK; \
2755 #define MPN_INCR_U(ptr, size, n) \
2757 ASSERT ((size) >= 1); \
2758 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
2761 #define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
2767 #define MPN_DECR_U(ptr, size, n) \
2769 ASSERT ((size) >= 1); \
2770 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
2773 #define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
2778 /* Structure for conversion between internal binary format and strings. */
2781 /* Number of digits in the conversion base that always fits in an mp_limb_t.
2782 For example, for base 10 on a machine where a mp_limb_t has 32 bits this
2783 is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
2786 /* log(2)/log(conversion_base) */
2789 /* log(conversion_base)/log(2) */
2792 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2793 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
2794 i.e. the number of bits used to represent each digit in the base. */
2797 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
2798 fixed-point number. Instead of dividing by big_base an application can
2799 choose to multiply by big_base_inverted. */
2800 mp_limb_t big_base_inverted;
2803 #define mp_bases __MPN(bases)
2804 __GMP_DECLSPEC extern const struct bases mp_bases[257];
2807 /* Compute the number of digits in base for nbits bits, making sure the result
2808 is never too small. The two variants of the macro implement the same
2809 function; the GT2 variant below works just for bases > 2. */
2810 #define DIGITS_IN_BASE_FROM_BITS(res, nbits, b) \
2812 mp_limb_t _ph, _dummy; \
2813 size_t _nbits = (nbits); \
2814 umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits); \
2815 _ph += (_dummy + _nbits < _dummy); \
2818 #define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b) \
2820 mp_limb_t _ph, _dummy; \
2821 size_t _nbits = (nbits); \
2822 umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits); \
2826 /* For power of 2 bases this is exact. For other bases the result is either
2827 exact or one too big.
2829 To be exact always it'd be necessary to examine all the limbs of the
2830 operand, since numbers like 100..000 and 99...999 generally differ only
2831 in the lowest limb. It'd be possible to examine just a couple of high
2832 limbs to increase the probability of being exact, but that doesn't seem
2833 worth bothering with. */
2835 #define MPN_SIZEINBASE(result, ptr, size, base) \
2837 int __lb_base, __cnt; \
2840 ASSERT ((size) >= 0); \
2841 ASSERT ((base) >= 2); \
2842 ASSERT ((base) < numberof (mp_bases)); \
2844 /* Special case for X == 0. */ \
2849 /* Calculate the total number of significant bits of X. */ \
2850 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2851 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2853 if (POW2_P (base)) \
2855 __lb_base = mp_bases[base].big_base; \
2856 (result) = (__totbits + __lb_base - 1) / __lb_base; \
2860 DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base); \
2865 #define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \
2868 mp_bitcnt_t __totbits; \
2869 ASSERT ((size) > 0); \
2870 ASSERT ((ptr)[(size)-1] != 0); \
2871 count_leading_zeros (__cnt, (ptr)[(size)-1]); \
2872 __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
2873 (result) = (__totbits + (base2exp)-1) / (base2exp); \
2877 /* bit count to limb count, rounding up */
2878 #define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2880 /* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake
2881 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs
2882 in both cases (LIMBS_PER_ULONG is also defined here.) */
2883 #if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2885 #define LIMBS_PER_ULONG 1
2886 #define MPN_SET_UI(zp, zn, u) \
2888 (zn) = ((zp)[0] != 0);
2889 #define MPZ_FAKE_UI(z, zp, u) \
2892 SIZ (z) = ((zp)[0] != 0); \
2893 ASSERT_CODE (ALLOC (z) = 1);
2895 #else /* need two limbs per ulong */
2897 #define LIMBS_PER_ULONG 2
2898 #define MPN_SET_UI(zp, zn, u) \
2899 (zp)[0] = (u) & GMP_NUMB_MASK; \
2900 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2901 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
2902 #define MPZ_FAKE_UI(z, zp, u) \
2903 (zp)[0] = (u) & GMP_NUMB_MASK; \
2904 (zp)[1] = (u) >> GMP_NUMB_BITS; \
2905 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
2907 ASSERT_CODE (ALLOC (z) = 2);
2912 #if HAVE_HOST_CPU_FAMILY_x86
2913 #define TARGET_REGISTER_STARVED 1
2915 #define TARGET_REGISTER_STARVED 0
2919 /* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
2920 or 0 there into a limb 0xFF..FF or 0 respectively.
2922 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
2923 but C99 doesn't guarantee signed right shifts are arithmetic, so we have
2924 a little compile-time test and a fallback to a "? :" form. The latter is
2925 necessary for instance on Cray vector systems.
2927 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
2928 to an arithmetic right shift anyway, but it's good to get the desired
2929 shift on past versions too (in particular since an important use of
2930 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
2932 #define LIMB_HIGHBIT_TO_MASK(n) \
2933 (((mp_limb_signed_t) -1 >> 1) < 0 \
2934 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
2935 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
2938 /* Use a library function for invert_limb, if available. */
2939 #define mpn_invert_limb __MPN(invert_limb)
2940 __GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
2941 #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
2942 #define invert_limb(invxl,xl) \
2944 (invxl) = mpn_invert_limb (xl); \
2949 #define invert_limb(invxl,xl) \
2952 ASSERT ((xl) != 0); \
2953 udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl); \
2957 #define invert_pi1(dinv, d1, d0) \
2959 mp_limb_t _v, _p, _t1, _t0, _mask; \
2960 invert_limb (_v, d1); \
2966 _mask = -(mp_limb_t) (_p >= (d1)); \
2969 _p -= _mask & (d1); \
2971 umul_ppmm (_t1, _t0, d0, _v); \
2976 if (UNLIKELY (_p >= (d1))) \
2978 if (_p > (d1) || _t0 >= (d0)) \
2982 (dinv).inv32 = _v; \
2986 /* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund.
2987 We write things strangely below, to help gcc. A more straightforward
2989 _r = (nl) - _qh * (d);
2996 For one operation shorter critical path, one may want to use this form:
3007 #define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
3009 mp_limb_t _qh, _ql, _r, _mask; \
3010 umul_ppmm (_qh, _ql, (nh), (di)); \
3011 if (__builtin_constant_p (nl) && (nl) == 0) \
3015 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3017 _r += _mask & (d); \
3021 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
3022 _r = (nl) - _qh * (d); \
3023 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3025 _r += _mask & (d); \
3026 if (UNLIKELY (_r >= (d))) \
3036 /* Dividing (NH, NL) by D, returning the remainder only. Unlike
3037 udiv_qrnnd_preinv, works also for the case NH == D, where the
3038 quotient doesn't quite fit in a single limb. */
3039 #define udiv_rnnd_preinv(r, nh, nl, d, di) \
3041 mp_limb_t _qh, _ql, _r, _mask; \
3042 umul_ppmm (_qh, _ql, (nh), (di)); \
3043 if (__builtin_constant_p (nl) && (nl) == 0) \
3045 _r = ~(_qh + (nh)) * (d); \
3046 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3047 _r += _mask & (d); \
3051 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
3052 _r = (nl) - _qh * (d); \
3053 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
3054 _r += _mask & (d); \
3055 if (UNLIKELY (_r >= (d))) \
3061 /* Compute quotient the quotient and remainder for n / d. Requires d
3062 >= B^2 / 2 and n < d B. di is the inverse
3064 floor ((B^3 - 1) / (d0 + d1 B)) - B.
3066 NOTE: Output variables are updated multiple times. Only some inputs
3067 and outputs may overlap.
3069 #define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
3071 mp_limb_t _q0, _t1, _t0, _mask; \
3072 umul_ppmm ((q), _q0, (n2), (dinv)); \
3073 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
3075 /* Compute the two most significant limbs of n - q'd */ \
3076 (r1) = (n1) - (d1) * (q); \
3077 sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
3078 umul_ppmm (_t1, _t0, (d0), (q)); \
3079 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
3082 /* Conditionally adjust q and the remainders */ \
3083 _mask = - (mp_limb_t) ((r1) >= _q0); \
3085 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
3086 if (UNLIKELY ((r1) >= (d1))) \
3088 if ((r1) > (d1) || (r0) >= (d0)) \
3091 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
3096 #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
3097 #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
3098 __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
3102 /* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
3103 plain mpn_divrem_1. The default is yes, since the few CISC chips where
3104 preinv is not good have defines saying so. */
3105 #ifndef USE_PREINV_DIVREM_1
3106 #define USE_PREINV_DIVREM_1 1
3109 #if USE_PREINV_DIVREM_1
3110 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
3111 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
3113 #define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \
3114 mpn_divrem_1 (qp, xsize, ap, size, d)
3117 #ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
3118 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
3121 /* This selection may seem backwards. The reason mpn_mod_1 typically takes
3122 over for larger sizes is that it uses the mod_1_1 function. */
3123 #define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
3124 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \
3125 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \
3126 : mpn_mod_1 (src, size, divisor))
3129 #ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
3130 #define mpn_mod_34lsub1 __MPN(mod_34lsub1)
3131 __GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
3135 /* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
3136 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for
3137 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and
3138 modexact are faster at all sizes, so the defaults are 0. Those CPUs
3139 where this is not right have a tuned threshold. */
3140 #ifndef DIVEXACT_1_THRESHOLD
3141 #define DIVEXACT_1_THRESHOLD 0
3143 #ifndef BMOD_1_TO_MOD_1_THRESHOLD
3144 #define BMOD_1_TO_MOD_1_THRESHOLD 10
3147 #ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */
3148 #define mpn_divexact_1 __MPN(divexact_1)
3149 __GMP_DECLSPEC void mpn_divexact_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
3152 #define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d) \
3154 if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD)) \
3155 ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d)); \
3158 ASSERT (mpn_mod_1 (up, n, d) == 0); \
3159 mpn_divexact_1 (rp, up, n, d); \
3163 #ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
3164 #define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
3165 __GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3168 #if HAVE_NATIVE_mpn_modexact_1_odd
3169 #define mpn_modexact_1_odd __MPN(modexact_1_odd)
3170 __GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3172 #define mpn_modexact_1_odd(src,size,divisor) \
3173 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
3176 #define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \
3177 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \
3178 ? mpn_modexact_1_odd (src, size, divisor) \
3179 : mpn_mod_1 (src, size, divisor))
3181 /* binvert_limb() sets inv to the multiplicative inverse of n modulo
3182 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
3183 n must be odd (otherwise such an inverse doesn't exist).
3185 This is not to be confused with invert_limb(), which is completely
3188 The table lookup gives an inverse with the low 8 bits valid, and each
3189 multiply step doubles the number of bits. See Jebelean "An algorithm for
3190 exact division" end of section 4 (reference in gmp.texi).
3192 Possible enhancement: Could use UHWtype until the last step, if half-size
3193 multiplies are faster (might help under _LONG_LONG_LIMB).
3195 Alternative: As noted in Granlund and Montgomery "Division by Invariant
3196 Integers using Multiplication" (reference in gmp.texi), n itself gives a
3197 3-bit inverse immediately, and could be used instead of a table lookup.
3198 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
3199 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */
3201 #define binvert_limb_table __gmp_binvert_limb_table
3202 __GMP_DECLSPEC extern const unsigned char binvert_limb_table[128];
3204 #define binvert_limb(inv,n) \
3206 mp_limb_t __n = (n); \
3208 ASSERT ((__n & 1) == 1); \
3210 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \
3211 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \
3212 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \
3213 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \
3215 if (GMP_NUMB_BITS > 64) \
3217 int __invbits = 64; \
3219 __inv = 2 * __inv - __inv * __inv * __n; \
3221 } while (__invbits < GMP_NUMB_BITS); \
3224 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \
3225 (inv) = __inv & GMP_NUMB_MASK; \
3227 #define modlimb_invert binvert_limb /* backward compatibility */
3229 /* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
3230 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
3231 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
3232 we need to start from GMP_NUMB_MAX>>1. */
3233 #define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
3235 /* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
3236 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
3237 #define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1)
3238 #define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
3241 /* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs.
3243 It's not clear whether this is the best way to do this calculation.
3244 Anything congruent to -a would be fine for the one limb congruence
3247 #define NEG_MOD(r, a, d) \
3249 ASSERT ((d) != 0); \
3255 /* small a is reasonably likely */ \
3261 mp_limb_t __dnorm; \
3262 count_leading_zeros (__twos, d); \
3263 __twos -= GMP_NAIL_BITS; \
3264 __dnorm = (d) << __twos; \
3265 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
3271 /* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
3272 #define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1)
3275 /* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
3276 to 0 if there's an even number. "n" should be an unsigned long and "p"
3279 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3280 #define ULONG_PARITY(p, n) \
3283 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \
3288 /* Cray intrinsic _popcnt. */
3290 #define ULONG_PARITY(p, n) \
3292 (p) = _popcnt (n) & 1; \
3296 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3297 && ! defined (NO_ASM) && defined (__ia64)
3298 /* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
3299 to a 64 bit unsigned long long for popcnt */
3300 #define ULONG_PARITY(p, n) \
3302 unsigned long long __n = (unsigned long) (n); \
3304 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
3309 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3310 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
3311 #if __GMP_GNUC_PREREQ (3,1)
3312 #define __GMP_qm "=Qm"
3313 #define __GMP_q "=Q"
3315 #define __GMP_qm "=qm"
3316 #define __GMP_q "=q"
3318 #define ULONG_PARITY(p, n) \
3321 unsigned long __n = (n); \
3322 __n ^= (__n >> 16); \
3323 __asm__ ("xorb %h1, %b1\n\t" \
3325 : __GMP_qm (__p), __GMP_q (__n) \
3331 #if ! defined (ULONG_PARITY)
3332 #define ULONG_PARITY(p, n) \
3334 unsigned long __n = (n); \
3338 __p ^= 0x96696996L >> (__n & 0x1F); \
3348 /* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of
3349 version 3.1 at least) doesn't seem to know how to generate rlwimi for
3350 anything other than bit-fields, so use "asm". */
3351 #if defined (__GNUC__) && ! defined (NO_ASM) \
3352 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
3353 #define BSWAP_LIMB(dst, src) \
3355 mp_limb_t __bswapl_src = (src); \
3356 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \
3357 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \
3358 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \
3359 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \
3360 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \
3361 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \
3362 (dst) = __tmp1 | __tmp2; /* whole */ \
3366 /* bswap is available on i486 and up and is fast. A combination rorw $8 /
3367 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
3368 kernel with xchgb instead of rorw), but this is not done here, because
3369 i386 means generic x86 and mixing word and dword operations will cause
3370 partial register stalls on P6 chips. */
3371 #if defined (__GNUC__) && ! defined (NO_ASM) \
3372 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \
3373 && GMP_LIMB_BITS == 32
3374 #define BSWAP_LIMB(dst, src) \
3376 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \
3380 #if defined (__GNUC__) && ! defined (NO_ASM) \
3381 && defined (__amd64__) && GMP_LIMB_BITS == 64
3382 #define BSWAP_LIMB(dst, src) \
3384 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \
3388 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3389 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3390 #define BSWAP_LIMB(dst, src) \
3392 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
3397 #if defined (__GNUC__) && ! defined (NO_ASM) \
3398 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
3399 #define BSWAP_LIMB(dst, src) \
3401 mp_limb_t __bswapl_src = (src); \
3402 __asm__ ("ror%.w %#8, %0\n\t" \
3406 : "0" (__bswapl_src)); \
3410 #if ! defined (BSWAP_LIMB)
3411 #if GMP_LIMB_BITS == 8
3412 #define BSWAP_LIMB(dst, src) \
3413 do { (dst) = (src); } while (0)
3415 #if GMP_LIMB_BITS == 16
3416 #define BSWAP_LIMB(dst, src) \
3418 (dst) = ((src) << 8) + ((src) >> 8); \
3421 #if GMP_LIMB_BITS == 32
3422 #define BSWAP_LIMB(dst, src) \
3426 + (((src) & 0xFF00) << 8) \
3427 + (((src) >> 8) & 0xFF00) \
3431 #if GMP_LIMB_BITS == 64
3432 #define BSWAP_LIMB(dst, src) \
3436 + (((src) & 0xFF00) << 40) \
3437 + (((src) & 0xFF0000) << 24) \
3438 + (((src) & 0xFF000000) << 8) \
3439 + (((src) >> 8) & 0xFF000000) \
3440 + (((src) >> 24) & 0xFF0000) \
3441 + (((src) >> 40) & 0xFF00) \
3447 #if ! defined (BSWAP_LIMB)
3448 #define BSWAP_LIMB(dst, src) \
3450 mp_limb_t __bswapl_src = (src); \
3451 mp_limb_t __dstl = 0; \
3453 for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \
3455 __dstl = (__dstl << 8) | (__bswapl_src & 0xFF); \
3456 __bswapl_src >>= 8; \
3463 /* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3464 those we know are fast. */
3465 #if defined (__GNUC__) && ! defined (NO_ASM) \
3466 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3467 && (HAVE_HOST_CPU_powerpc604 \
3468 || HAVE_HOST_CPU_powerpc604e \
3469 || HAVE_HOST_CPU_powerpc750 \
3470 || HAVE_HOST_CPU_powerpc7400)
3471 #define BSWAP_LIMB_FETCH(limb, src) \
3473 mp_srcptr __blf_src = (src); \
3475 __asm__ ("lwbrx %0, 0, %1" \
3477 : "r" (__blf_src), \
3478 "m" (*__blf_src)); \
3483 #if ! defined (BSWAP_LIMB_FETCH)
3484 #define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src))
3488 /* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3489 know are fast. FIXME: Is this necessary? */
3490 #if defined (__GNUC__) && ! defined (NO_ASM) \
3491 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
3492 && (HAVE_HOST_CPU_powerpc604 \
3493 || HAVE_HOST_CPU_powerpc604e \
3494 || HAVE_HOST_CPU_powerpc750 \
3495 || HAVE_HOST_CPU_powerpc7400)
3496 #define BSWAP_LIMB_STORE(dst, limb) \
3498 mp_ptr __dst = (dst); \
3499 mp_limb_t __limb = (limb); \
3500 __asm__ ("stwbrx %1, 0, %2" \
3507 #if ! defined (BSWAP_LIMB_STORE)
3508 #define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb)
3512 /* Byte swap limbs from {src,size} and store at {dst,size}. */
3513 #define MPN_BSWAP(dst, src, size) \
3515 mp_ptr __dst = (dst); \
3516 mp_srcptr __src = (src); \
3517 mp_size_t __size = (size); \
3519 ASSERT ((size) >= 0); \
3520 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
3521 CRAY_Pragma ("_CRI ivdep"); \
3522 for (__i = 0; __i < __size; __i++) \
3524 BSWAP_LIMB_FETCH (*__dst, __src); \
3530 /* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3531 #define MPN_BSWAP_REVERSE(dst, src, size) \
3533 mp_ptr __dst = (dst); \
3534 mp_size_t __size = (size); \
3535 mp_srcptr __src = (src) + __size - 1; \
3537 ASSERT ((size) >= 0); \
3538 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
3539 CRAY_Pragma ("_CRI ivdep"); \
3540 for (__i = 0; __i < __size; __i++) \
3542 BSWAP_LIMB_FETCH (*__dst, __src); \
3549 /* No processor claiming to be SPARC v9 compliant seems to
3550 implement the POPC instruction. Disable pattern for now. */
3552 #if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
3553 #define popc_limb(result, input) \
3556 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \
3561 #if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3562 #define popc_limb(result, input) \
3564 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \
3568 /* Cray intrinsic. */
3570 #define popc_limb(result, input) \
3572 (result) = _popcnt (input); \
3576 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \
3577 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3578 #define popc_limb(result, input) \
3580 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
3584 /* Cool population count of an mp_limb_t.
3585 You have to figure out how this works, We won't tell you!
3587 The constants could also be expressed as:
3588 0x55... = [2^N / 3] = [(2^N-1)/3]
3589 0x33... = [2^N / 5] = [(2^N-1)/5]
3590 0x0f... = [2^N / 17] = [(2^N-1)/17]
3591 (N is GMP_LIMB_BITS, [] denotes truncation.) */
3593 #if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3594 #define popc_limb(result, input) \
3596 mp_limb_t __x = (input); \
3597 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3598 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3599 __x = ((__x >> 4) + __x); \
3600 (result) = __x & 0x0f; \
3604 #if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3605 #define popc_limb(result, input) \
3607 mp_limb_t __x = (input); \
3608 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3609 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3610 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3611 __x = ((__x >> 8) + __x); \
3612 (result) = __x & 0xff; \
3616 #if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3617 #define popc_limb(result, input) \
3619 mp_limb_t __x = (input); \
3620 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3621 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3622 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3623 __x = ((__x >> 8) + __x); \
3624 __x = ((__x >> 16) + __x); \
3625 (result) = __x & 0xff; \
3629 #if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3630 #define popc_limb(result, input) \
3632 mp_limb_t __x = (input); \
3633 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
3634 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
3635 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
3636 __x = ((__x >> 8) + __x); \
3637 __x = ((__x >> 16) + __x); \
3638 __x = ((__x >> 32) + __x); \
3639 (result) = __x & 0xff; \
3644 /* Define stuff for longlong.h. */
3645 #if HAVE_ATTRIBUTE_MODE
3646 typedef unsigned int UQItype __attribute__ ((mode (QI)));
3647 typedef int SItype __attribute__ ((mode (SI)));
3648 typedef unsigned int USItype __attribute__ ((mode (SI)));
3649 typedef int DItype __attribute__ ((mode (DI)));
3650 typedef unsigned int UDItype __attribute__ ((mode (DI)));
3652 typedef unsigned char UQItype;
3653 typedef long SItype;
3654 typedef unsigned long USItype;
3656 typedef long long int DItype;
3657 typedef unsigned long long int UDItype;
3658 #else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
3659 typedef long int DItype;
3660 typedef unsigned long int UDItype;
3664 typedef mp_limb_t UWtype;
3665 typedef unsigned int UHWtype;
3666 #define W_TYPE_SIZE GMP_LIMB_BITS
3668 /* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3670 Bit field packing is "implementation defined" according to C99, which
3671 leaves us at the compiler's mercy here. For some systems packing is
3672 defined in the ABI (eg. x86). In any case so far it seems universal that
3673 little endian systems pack from low to high, and big endian from high to
3674 low within the given type.
3676 Within the fields we rely on the integer endianness being the same as the
3677 float endianness, this is true everywhere we know of and it'd be a fairly
3678 strange system that did anything else. */
3680 #if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3681 #define _GMP_IEEE_FLOATS 1
3682 union ieee_double_extract
3686 gmp_uint_least32_t manh:20;
3687 gmp_uint_least32_t exp:11;
3688 gmp_uint_least32_t sig:1;
3689 gmp_uint_least32_t manl:32;
3695 #if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3696 #define _GMP_IEEE_FLOATS 1
3697 union ieee_double_extract
3701 gmp_uint_least32_t manl:32;
3702 gmp_uint_least32_t manh:20;
3703 gmp_uint_least32_t exp:11;
3704 gmp_uint_least32_t sig:1;
3710 #if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3711 #define _GMP_IEEE_FLOATS 1
3712 union ieee_double_extract
3716 gmp_uint_least32_t sig:1;
3717 gmp_uint_least32_t exp:11;
3718 gmp_uint_least32_t manh:20;
3719 gmp_uint_least32_t manl:32;
3725 #if HAVE_DOUBLE_VAX_D
3726 union double_extract
3730 gmp_uint_least32_t man3:7; /* highest 7 bits */
3731 gmp_uint_least32_t exp:8; /* excess-128 exponent */
3732 gmp_uint_least32_t sig:1;
3733 gmp_uint_least32_t man2:16;
3734 gmp_uint_least32_t man1:16;
3735 gmp_uint_least32_t man0:16; /* lowest 16 bits */
3741 /* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3742 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
3743 #define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3744 /* Maximum number of limbs it will take to store any `double'.
3745 We assume doubles have 53 mantissa bits. */
3746 #define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3748 __GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
3750 #define mpn_get_d __gmpn_get_d
3751 __GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
3754 /* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3755 a_inf if x is an infinity. Both are considered unlikely values, for
3756 branch prediction. */
3758 #if _GMP_IEEE_FLOATS
3759 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3761 union ieee_double_extract u; \
3763 if (UNLIKELY (u.s.exp == 0x7FF)) \
3765 if (u.s.manl == 0 && u.s.manh == 0) \
3773 #if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3774 /* no nans or infs in these formats */
3775 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3779 #ifndef DOUBLE_NAN_INF_ACTION
3780 /* Unknown format, try something generic.
3781 NaN should be "unordered", so x!=x.
3782 Inf should be bigger than DBL_MAX. */
3783 #define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
3786 if (UNLIKELY ((x) != (x))) \
3788 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
3794 /* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3795 in the coprocessor, which means a bigger exponent range than normal, and
3796 depending on the rounding mode, a bigger mantissa than normal. (See
3797 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches
3798 "d" through memory to force any rounding and overflows to occur.
3800 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3801 registers, where there's no such extra precision and no need for the
3802 FORCE_DOUBLE. We don't bother to detect this since the present uses for
3803 FORCE_DOUBLE are only in test programs and default generic C code.
3805 Not quite sure that an "automatic volatile" will use memory, but it does
3806 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3807 apparently matching operands like "0" are only allowed on a register
3808 output. gcc 3.4 warns about this, though in fact it and past versions
3809 seem to put the operand through memory as hoped. */
3811 #if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \
3812 || defined (__amd64__))
3813 #define FORCE_DOUBLE(d) \
3814 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3816 #define FORCE_DOUBLE(d) do { } while (0)
3820 __GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
3822 __GMP_DECLSPEC extern int __gmp_junk;
3823 __GMP_DECLSPEC extern const int __gmp_0;
3824 __GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
3825 __GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
3826 __GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
3827 __GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
3828 #define GMP_ERROR(code) __gmp_exception (code)
3829 #define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
3830 #define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
3832 #if defined _LONG_LONG_LIMB
3833 #define CNST_LIMB(C) ((mp_limb_t) C##LL)
3834 #else /* not _LONG_LONG_LIMB */
3835 #define CNST_LIMB(C) ((mp_limb_t) C##L)
3836 #endif /* _LONG_LONG_LIMB */
3838 /* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3839 #if GMP_NUMB_BITS == 2
3840 #define PP 0x3 /* 3 */
3841 #define PP_FIRST_OMITTED 5
3843 #if GMP_NUMB_BITS == 4
3844 #define PP 0xF /* 3 x 5 */
3845 #define PP_FIRST_OMITTED 7
3847 #if GMP_NUMB_BITS == 8
3848 #define PP 0x69 /* 3 x 5 x 7 */
3849 #define PP_FIRST_OMITTED 11
3851 #if GMP_NUMB_BITS == 16
3852 #define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */
3853 #define PP_FIRST_OMITTED 17
3855 #if GMP_NUMB_BITS == 32
3856 #define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */
3857 #define PP_INVERTED 0x53E5645CL
3858 #define PP_FIRST_OMITTED 31
3860 #if GMP_NUMB_BITS == 64
3861 #define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */
3862 #define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3863 #define PP_FIRST_OMITTED 59
3865 #ifndef PP_FIRST_OMITTED
3866 #define PP_FIRST_OMITTED 3
3869 /* BIT1 means a result value in bit 1 (second least significant bit), with a
3870 zero bit representing +1 and a one bit representing -1. Bits other than
3871 bit 1 are garbage. These are meant to be kept in "int"s, and casts are
3872 used to ensure the expressions are "int"s even if a and/or b might be
3875 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3876 and their speed is important. Expressions are used rather than
3877 conditionals to accumulate sign changes, which effectively means XORs
3878 instead of conditional JUMPs. */
3880 /* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3881 #define JACOBI_S0(a) (((a) == 1) | ((a) == -1))
3883 /* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3884 #define JACOBI_U0(a) ((a) == 1)
3886 /* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
3887 come up with a better name. */
3889 /* (a/0), with a given by low and size;
3890 is 1 if a=+/-1, 0 otherwise */
3891 #define JACOBI_LS0(alow,asize) \
3892 (((asize) == 1 || (asize) == -1) && (alow) == 1)
3894 /* (a/0), with a an mpz_t;
3895 fetch of low limb always valid, even if size is zero */
3896 #define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a))
3898 /* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
3899 #define JACOBI_0U(b) ((b) == 1)
3901 /* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
3902 #define JACOBI_0S(b) ((b) == 1 || (b) == -1)
3904 /* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
3905 #define JACOBI_0LS(blow,bsize) \
3906 (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
3908 /* Convert a bit1 to +1 or -1. */
3909 #define JACOBI_BIT1_TO_PN(result_bit1) \
3910 (1 - ((int) (result_bit1) & 2))
3912 /* (2/b), with b unsigned and odd;
3913 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
3914 hence obtained from (b>>1)^b */
3915 #define JACOBI_TWO_U_BIT1(b) \
3916 ((int) (((b) >> 1) ^ (b)))
3918 /* (2/b)^twos, with b unsigned and odd */
3919 #define JACOBI_TWOS_U_BIT1(twos, b) \
3920 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
3922 /* (2/b)^twos, with b unsigned and odd */
3923 #define JACOBI_TWOS_U(twos, b) \
3924 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
3926 /* (-1/b), with b odd (signed or unsigned);
3927 is (-1)^((b-1)/2) */
3928 #define JACOBI_N1B_BIT1(b) \
3931 /* (a/b) effect due to sign of a: signed/unsigned, b odd;
3932 is (-1/b) if a<0, or +1 if a>=0 */
3933 #define JACOBI_ASGN_SU_BIT1(a, b) \
3934 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
3936 /* (a/b) effect due to sign of b: signed/signed;
3937 is -1 if a and b both negative, +1 otherwise */
3938 #define JACOBI_BSGN_SS_BIT1(a, b) \
3939 ((((a)<0) & ((b)<0)) << 1)
3941 /* (a/b) effect due to sign of b: signed/mpz;
3942 is -1 if a and b both negative, +1 otherwise */
3943 #define JACOBI_BSGN_SZ_BIT1(a, b) \
3944 JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
3946 /* (a/b) effect due to sign of b: mpz/signed;
3947 is -1 if a and b both negative, +1 otherwise */
3948 #define JACOBI_BSGN_ZS_BIT1(a, b) \
3949 JACOBI_BSGN_SZ_BIT1 (b, a)
3951 /* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
3952 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
3953 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
3954 because this is used in a couple of places with only bit 1 of a or b
3956 #define JACOBI_RECIP_UU_BIT1(a, b) \
3959 /* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
3960 decrementing b_size. b_low should be b_ptr[0] on entry, and will be
3961 updated for the new b_ptr. result_bit1 is updated according to the
3962 factors of 2 stripped, as per (a/2). */
3963 #define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
3965 ASSERT ((b_size) >= 1); \
3966 ASSERT ((b_low) == (b_ptr)[0]); \
3968 while (UNLIKELY ((b_low) == 0)) \
3971 ASSERT ((b_size) >= 1); \
3973 (b_low) = *(b_ptr); \
3975 ASSERT (((a) & 1) != 0); \
3976 if ((GMP_NUMB_BITS % 2) == 1) \
3977 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
3981 /* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
3982 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and
3983 unsigned. modexact_1_odd effectively calculates -a mod b, and
3984 result_bit1 is adjusted for the factor of -1.
3986 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
3987 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
3988 factor to introduce into result_bit1, so for that case use mpn_mod_1
3991 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
3992 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result,
3993 or not skip a divide step, or something. */
3995 #define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
3997 mp_srcptr __a_ptr = (a_ptr); \
3998 mp_size_t __a_size = (a_size); \
3999 mp_limb_t __b = (b); \
4001 ASSERT (__a_size >= 1); \
4004 if ((GMP_NUMB_BITS % 2) != 0 \
4005 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
4007 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
4011 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
4012 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
4016 /* State for the Jacobi computation using Lehmer. */
4017 #define jacobi_table __gmp_jacobi_table
4018 __GMP_DECLSPEC extern const unsigned char jacobi_table[208];
4020 /* Bit layout for the initial state. b must be odd.
4028 static inline unsigned
4029 mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
4033 return ((a & 3) << 2) + (b & 2) + s;
4037 mpn_jacobi_finish (unsigned bits)
4039 /* (a, b) = (1,0) or (0,1) */
4040 ASSERT ( (bits & 14) == 0);
4042 return 1-2*(bits & 1);
4045 static inline unsigned
4046 mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
4048 /* FIXME: Could halve table size by not including the e bit in the
4049 * index, and instead xor when updating. Then the lookup would be
4052 * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
4056 ASSERT (denominator < 2);
4059 /* For almost all calls, denominator is constant and quite often q
4060 is constant too. So use addition rather than or, so the compiler
4061 can put the constant part can into the offset of an indexed
4062 addressing instruction.
4064 With constant denominator, the below table lookup is compiled to
4066 C Constant q = 1, constant denominator = 1
4067 movzbl table+5(%eax,8), %eax
4071 C q in %edx, constant denominator = 1
4072 movzbl table+4(%edx,%eax,8), %eax
4074 One could maintain the state preshifted 3 bits, to save a shift
4075 here, but at least on x86, that's no real saving.
4077 return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
4080 /* Matrix multiplication */
4081 #define mpn_matrix22_mul __MPN(matrix22_mul)
4082 __GMP_DECLSPEC void mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
4083 #define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
4084 __GMP_DECLSPEC void mpn_matrix22_mul_strassen (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
4085 #define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
4086 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t);
4088 #ifndef MATRIX22_STRASSEN_THRESHOLD
4089 #define MATRIX22_STRASSEN_THRESHOLD 30
4092 /* HGCD definitions */
4094 /* Extract one numb, shifting count bits left
4096 |___xh___||___xl___|
4100 The count includes any nail bits, so it should work fine if count
4101 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
4102 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
4104 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
4105 those calls where the count high bits of xh may be non-zero.
4108 #define MPN_EXTRACT_NUMB(count, xh, xl) \
4109 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \
4110 ((xl) >> (GMP_LIMB_BITS - (count))))
4113 /* The matrix non-negative M = (u, u'; v,v') keeps track of the
4114 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
4115 than a, b. The determinant must always be one, so that M has an
4116 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
4123 #define mpn_hgcd2 __MPN (hgcd2)
4124 __GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *);
4126 #define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
4127 __GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4129 #define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
4130 __GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4132 #define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
4133 __GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
4137 mp_size_t alloc; /* for sanity checking only */
4142 #define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
4144 #define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
4145 __GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
4147 #define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
4148 __GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
4150 #define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
4151 __GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
4153 #define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
4154 __GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
4156 #define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
4157 __GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4159 #define mpn_hgcd_step __MPN(hgcd_step)
4160 __GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4162 #define mpn_hgcd_reduce __MPN(hgcd_reduce)
4163 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
4165 #define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
4166 __GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t);
4168 #define mpn_hgcd_itch __MPN (hgcd_itch)
4169 __GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t);
4171 #define mpn_hgcd __MPN (hgcd)
4172 __GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4174 #define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
4175 __GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t);
4177 #define mpn_hgcd_appr __MPN (hgcd_appr)
4178 __GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4180 #define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
4181 __GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
4183 typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
4185 /* Needs storage for the quotient */
4186 #define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
4188 #define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
4189 __GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
4193 /* Result parameters. */
4199 /* Cofactors updated in each step. */
4204 #define mpn_gcdext_hook __MPN (gcdext_hook)
4205 gcd_subdiv_step_hook mpn_gcdext_hook;
4207 #define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
4209 #define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
4210 __GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4212 /* 4*(an + 1) + 4*(bn + 1) + an */
4213 #define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
4215 #ifndef HGCD_THRESHOLD
4216 #define HGCD_THRESHOLD 400
4219 #ifndef HGCD_APPR_THRESHOLD
4220 #define HGCD_APPR_THRESHOLD 400
4223 #ifndef HGCD_REDUCE_THRESHOLD
4224 #define HGCD_REDUCE_THRESHOLD 1000
4227 #ifndef GCD_DC_THRESHOLD
4228 #define GCD_DC_THRESHOLD 1000
4231 #ifndef GCDEXT_DC_THRESHOLD
4232 #define GCDEXT_DC_THRESHOLD 600
4235 /* Definitions for mpn_set_str and mpn_get_str */
4238 mp_ptr p; /* actual power value */
4239 mp_size_t n; /* # of limbs at p */
4240 mp_size_t shift; /* weight of lowest limb, in limb base B */
4241 size_t digits_in_base; /* number of corresponding digits */
4244 typedef struct powers powers_t;
4245 #define mpn_dc_set_str_powtab_alloc(n) ((n) + GMP_LIMB_BITS)
4246 #define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
4247 #define mpn_dc_get_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS)
4248 #define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
4250 #define mpn_dc_set_str __MPN(dc_set_str)
4251 __GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
4252 #define mpn_bc_set_str __MPN(bc_set_str)
4253 __GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
4254 #define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
4255 __GMP_DECLSPEC void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
4258 /* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
4259 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb,
4260 hence giving back the user's size in bits rounded up. Notice that
4261 converting prec->bits->prec gives an unchanged value. */
4262 #define __GMPF_BITS_TO_PREC(n) \
4263 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
4264 #define __GMPF_PREC_TO_BITS(n) \
4265 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
4267 __GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
4269 /* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
4271 #define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b) \
4273 mp_limb_t _ph, _pl; \
4274 umul_ppmm (_ph, _pl, \
4275 mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
4279 /* Compute the number of limbs corresponding to ndigits base-b digits, rounding
4281 #define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b) \
4283 mp_limb_t _ph, _dummy; \
4284 umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \
4285 res = 8 * _ph / GMP_NUMB_BITS + 2; \
4289 /* Set n to the number of significant digits an mpf of the given _mp_prec
4290 field, in the given base. This is a rounded up value, designed to ensure
4291 there's enough digits to reproduce all the guaranteed part of the value.
4293 There are prec many limbs, but the high might be only "1" so forget it
4294 and just count prec-1 limbs into chars. +1 rounds that upwards, and a
4295 further +1 is because the limbs usually won't fall on digit boundaries.
4297 FIXME: If base is a power of 2 and the bits per digit divides
4298 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for
4299 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
4301 #define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
4304 ASSERT (base >= 2 && base < numberof (mp_bases)); \
4305 DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base); \
4310 /* Decimal point string, from the current C locale. Needs <langinfo.h> for
4311 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
4312 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
4313 their respective #if HAVE_FOO_H.
4315 GLIBC recommends nl_langinfo because getting only one facet can be
4316 faster, apparently. */
4318 /* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
4319 #if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
4320 #define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT))
4322 /* RADIXCHAR is deprecated, still in unix98 or some such. */
4323 #if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
4324 #define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR))
4326 /* localeconv is slower since it returns all locale stuff */
4327 #if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
4328 #define GMP_DECIMAL_POINT (localeconv()->decimal_point)
4330 #if ! defined (GMP_DECIMAL_POINT)
4331 #define GMP_DECIMAL_POINT (".")
4335 #define DOPRNT_CONV_FIXED 1
4336 #define DOPRNT_CONV_SCIENTIFIC 2
4337 #define DOPRNT_CONV_GENERAL 3
4339 #define DOPRNT_JUSTIFY_NONE 0
4340 #define DOPRNT_JUSTIFY_LEFT 1
4341 #define DOPRNT_JUSTIFY_RIGHT 2
4342 #define DOPRNT_JUSTIFY_INTERNAL 3
4344 #define DOPRNT_SHOWBASE_YES 1
4345 #define DOPRNT_SHOWBASE_NO 2
4346 #define DOPRNT_SHOWBASE_NONZERO 3
4348 struct doprnt_params_t {
4349 int base; /* negative for upper case */
4350 int conv; /* choices above */
4351 const char *expfmt; /* exponent format */
4352 int exptimes4; /* exponent multiply by 4 */
4353 char fill; /* character */
4354 int justify; /* choices above */
4355 int prec; /* prec field, or -1 for all digits */
4356 int showbase; /* choices above */
4357 int showpoint; /* if radix point always shown */
4358 int showtrailing; /* if trailing zeros wanted */
4359 char sign; /* '+', ' ', or '\0' */
4360 int width; /* width field */
4363 #if _GMP_H_HAVE_VA_LIST
4365 typedef int (*doprnt_format_t) (void *, const char *, va_list);
4366 typedef int (*doprnt_memory_t) (void *, const char *, size_t);
4367 typedef int (*doprnt_reps_t) (void *, int, int);
4368 typedef int (*doprnt_final_t) (void *);
4370 struct doprnt_funs_t {
4371 doprnt_format_t format;
4372 doprnt_memory_t memory;
4374 doprnt_final_t final; /* NULL if not required */
4377 extern const struct doprnt_funs_t __gmp_fprintf_funs;
4378 extern const struct doprnt_funs_t __gmp_sprintf_funs;
4379 extern const struct doprnt_funs_t __gmp_snprintf_funs;
4380 extern const struct doprnt_funs_t __gmp_obstack_printf_funs;
4381 extern const struct doprnt_funs_t __gmp_ostream_funs;
4383 /* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first
4384 "size" of these have been written. "alloc > size" is maintained, so
4385 there's room to store a '\0' at the end. "result" is where the
4386 application wants the final block pointer. */
4387 struct gmp_asprintf_t {
4394 #define GMP_ASPRINTF_T_INIT(d, output) \
4396 (d).result = (output); \
4398 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
4402 /* If a realloc is necessary, use twice the size actually required, so as to
4403 avoid repeated small reallocs. */
4404 #define GMP_ASPRINTF_T_NEED(d, n) \
4406 size_t alloc, newsize, newalloc; \
4407 ASSERT ((d)->alloc >= (d)->size + 1); \
4409 alloc = (d)->alloc; \
4410 newsize = (d)->size + (n); \
4411 if (alloc <= newsize) \
4413 newalloc = 2*newsize; \
4414 (d)->alloc = newalloc; \
4415 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
4416 alloc, newalloc, char); \
4420 __GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
4421 __GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
4422 __GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
4424 /* buf is where to write the next output, and size is how much space is left
4425 there. If the application passed size==0 then that's what we'll have
4426 here, and nothing at all should be written. */
4427 struct gmp_snprintf_t {
4432 /* Add the bytes printed by the call to the total retval, or bail out on an
4434 #define DOPRNT_ACCUMULATE(call) \
4442 #define DOPRNT_ACCUMULATE_FUN(fun, params) \
4444 ASSERT ((fun) != NULL); \
4445 DOPRNT_ACCUMULATE ((*(fun)) params); \
4448 #define DOPRNT_FORMAT(fmt, ap) \
4449 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
4450 #define DOPRNT_MEMORY(ptr, len) \
4451 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
4452 #define DOPRNT_REPS(c, n) \
4453 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
4455 #define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
4457 #define DOPRNT_REPS_MAYBE(c, n) \
4460 DOPRNT_REPS (c, n); \
4462 #define DOPRNT_MEMORY_MAYBE(ptr, len) \
4465 DOPRNT_MEMORY (ptr, len); \
4468 __GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
4469 __GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
4471 #define __gmp_doprnt_mpf __gmp_doprnt_mpf2
4472 __GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
4474 __GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
4475 #endif /* _GMP_H_HAVE_VA_LIST */
4478 typedef int (*gmp_doscan_scan_t) (void *, const char *, ...);
4479 typedef void *(*gmp_doscan_step_t) (void *, int);
4480 typedef int (*gmp_doscan_get_t) (void *);
4481 typedef int (*gmp_doscan_unget_t) (int, void *);
4483 struct gmp_doscan_funs_t {
4484 gmp_doscan_scan_t scan;
4485 gmp_doscan_step_t step;
4486 gmp_doscan_get_t get;
4487 gmp_doscan_unget_t unget;
4489 extern const struct gmp_doscan_funs_t __gmp_fscanf_funs;
4490 extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
4492 #if _GMP_H_HAVE_VA_LIST
4493 __GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
4497 /* For testing and debugging. */
4498 #define MPZ_CHECK_FORMAT(z) \
4500 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
4501 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
4502 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
4505 #define MPQ_CHECK_FORMAT(q) \
4507 MPZ_CHECK_FORMAT (mpq_numref (q)); \
4508 MPZ_CHECK_FORMAT (mpq_denref (q)); \
4509 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
4511 if (SIZ(mpq_numref(q)) == 0) \
4513 /* should have zero as 0/1 */ \
4514 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
4515 && PTR(mpq_denref(q))[0] == 1); \
4519 /* should have no common factors */ \
4522 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
4523 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
4528 #define MPF_CHECK_FORMAT(f) \
4530 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
4531 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
4533 ASSERT_ALWAYS (EXP(f) == 0); \
4535 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
4539 #define MPZ_PROVOKE_REALLOC(z) \
4540 do { ALLOC(z) = ABSIZ(z); } while (0)
4543 /* Enhancement: The "mod" and "gcd_1" functions below could have
4544 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
4545 function pointers, only actual functions. It probably doesn't make much
4546 difference to the gmp code, since hopefully we arrange calls so there's
4547 no great need for the compiler to move things around. */
4549 #if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
4550 /* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
4551 in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */
4553 DECL_add_n ((*add_n));
4554 DECL_addlsh1_n ((*addlsh1_n));
4555 DECL_addlsh2_n ((*addlsh2_n));
4556 DECL_addmul_1 ((*addmul_1));
4557 DECL_addmul_2 ((*addmul_2));
4558 DECL_bdiv_dbm1c ((*bdiv_dbm1c));
4560 DECL_copyd ((*copyd));
4561 DECL_copyi ((*copyi));
4562 DECL_divexact_1 ((*divexact_1));
4563 DECL_divrem_1 ((*divrem_1));
4564 DECL_gcd_1 ((*gcd_1));
4565 DECL_lshift ((*lshift));
4566 DECL_lshiftc ((*lshiftc));
4567 DECL_mod_1 ((*mod_1));
4568 DECL_mod_1_1p ((*mod_1_1p));
4569 DECL_mod_1_1p_cps ((*mod_1_1p_cps));
4570 DECL_mod_1s_2p ((*mod_1s_2p));
4571 DECL_mod_1s_2p_cps ((*mod_1s_2p_cps));
4572 DECL_mod_1s_4p ((*mod_1s_4p));
4573 DECL_mod_1s_4p_cps ((*mod_1s_4p_cps));
4574 DECL_mod_34lsub1 ((*mod_34lsub1));
4575 DECL_modexact_1c_odd ((*modexact_1c_odd));
4576 DECL_mul_1 ((*mul_1));
4577 DECL_mul_basecase ((*mul_basecase));
4578 DECL_mullo_basecase ((*mullo_basecase));
4579 DECL_preinv_divrem_1 ((*preinv_divrem_1));
4580 DECL_preinv_mod_1 ((*preinv_mod_1));
4581 DECL_redc_1 ((*redc_1));
4582 DECL_redc_2 ((*redc_2));
4583 DECL_rshift ((*rshift));
4584 DECL_sqr_basecase ((*sqr_basecase));
4585 DECL_sub_n ((*sub_n));
4586 DECL_sublsh1_n ((*sublsh1_n));
4587 DECL_submul_1 ((*submul_1));
4588 mp_size_t mul_toom22_threshold;
4589 mp_size_t mul_toom33_threshold;
4590 mp_size_t sqr_toom2_threshold;
4591 mp_size_t sqr_toom3_threshold;
4592 mp_size_t bmod_1_to_mod_1_threshold;
4594 __GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
4595 __GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
4596 #endif /* x86 fat binary */
4598 __GMP_DECLSPEC void __gmpn_cpuvec_init (void);
4600 /* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
4601 if that hasn't yet been done (to establish the right values). */
4602 #define CPUVEC_THRESHOLD(field) \
4603 ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
4604 __gmpn_cpuvec.field)
4607 #if HAVE_NATIVE_mpn_add_nc
4608 #define mpn_add_nc __MPN(add_nc)
4609 __GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4613 mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4616 co = mpn_add_n (rp, up, vp, n);
4617 co += mpn_add_1 (rp, rp, n, ci);
4622 #if HAVE_NATIVE_mpn_sub_nc
4623 #define mpn_sub_nc __MPN(sub_nc)
4624 __GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4626 static inline mp_limb_t
4627 mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4630 co = mpn_sub_n (rp, up, vp, n);
4631 co += mpn_sub_1 (rp, rp, n, ci);
4637 mpn_zero_p (mp_srcptr ap, mp_size_t n)
4640 for (i = n - 1; i >= 0; i--)
4648 #if TUNE_PROGRAM_BUILD
4649 /* Some extras wanted when recompiling some .c files for use by the tune
4650 program. Not part of a normal build.
4652 It's necessary to keep these thresholds as #defines (just to an
4653 identically named variable), since various defaults are established based
4654 on #ifdef in the .c files. For some this is not so (the defaults are
4655 instead established above), but all are done this way for consistency. */
4657 #undef MUL_TOOM22_THRESHOLD
4658 #define MUL_TOOM22_THRESHOLD mul_toom22_threshold
4659 extern mp_size_t mul_toom22_threshold;
4661 #undef MUL_TOOM33_THRESHOLD
4662 #define MUL_TOOM33_THRESHOLD mul_toom33_threshold
4663 extern mp_size_t mul_toom33_threshold;
4665 #undef MUL_TOOM44_THRESHOLD
4666 #define MUL_TOOM44_THRESHOLD mul_toom44_threshold
4667 extern mp_size_t mul_toom44_threshold;
4669 #undef MUL_TOOM6H_THRESHOLD
4670 #define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold
4671 extern mp_size_t mul_toom6h_threshold;
4673 #undef MUL_TOOM8H_THRESHOLD
4674 #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold
4675 extern mp_size_t mul_toom8h_threshold;
4677 #undef MUL_TOOM32_TO_TOOM43_THRESHOLD
4678 #define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold
4679 extern mp_size_t mul_toom32_to_toom43_threshold;
4681 #undef MUL_TOOM32_TO_TOOM53_THRESHOLD
4682 #define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold
4683 extern mp_size_t mul_toom32_to_toom53_threshold;
4685 #undef MUL_TOOM42_TO_TOOM53_THRESHOLD
4686 #define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold
4687 extern mp_size_t mul_toom42_to_toom53_threshold;
4689 #undef MUL_TOOM42_TO_TOOM63_THRESHOLD
4690 #define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
4691 extern mp_size_t mul_toom42_to_toom63_threshold;
4693 #undef MUL_TOOM43_TO_TOOM54_THRESHOLD
4694 #define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold;
4695 extern mp_size_t mul_toom43_to_toom54_threshold;
4697 #undef MUL_FFT_THRESHOLD
4698 #define MUL_FFT_THRESHOLD mul_fft_threshold
4699 extern mp_size_t mul_fft_threshold;
4701 #undef MUL_FFT_MODF_THRESHOLD
4702 #define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
4703 extern mp_size_t mul_fft_modf_threshold;
4705 #undef MUL_FFT_TABLE
4706 #define MUL_FFT_TABLE { 0 }
4708 #undef MUL_FFT_TABLE3
4709 #define MUL_FFT_TABLE3 { {0,0} }
4711 /* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4712 remain as zero (always use it). */
4713 #if ! HAVE_NATIVE_mpn_sqr_basecase
4714 #undef SQR_BASECASE_THRESHOLD
4715 #define SQR_BASECASE_THRESHOLD sqr_basecase_threshold
4716 extern mp_size_t sqr_basecase_threshold;
4719 #if TUNE_PROGRAM_BUILD_SQR
4720 #undef SQR_TOOM2_THRESHOLD
4721 #define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC
4723 #undef SQR_TOOM2_THRESHOLD
4724 #define SQR_TOOM2_THRESHOLD sqr_toom2_threshold
4725 extern mp_size_t sqr_toom2_threshold;
4728 #undef SQR_TOOM3_THRESHOLD
4729 #define SQR_TOOM3_THRESHOLD sqr_toom3_threshold
4730 extern mp_size_t sqr_toom3_threshold;
4732 #undef SQR_TOOM4_THRESHOLD
4733 #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold
4734 extern mp_size_t sqr_toom4_threshold;
4736 #undef SQR_TOOM6_THRESHOLD
4737 #define SQR_TOOM6_THRESHOLD sqr_toom6_threshold
4738 extern mp_size_t sqr_toom6_threshold;
4740 #undef SQR_TOOM8_THRESHOLD
4741 #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold
4742 extern mp_size_t sqr_toom8_threshold;
4744 #undef SQR_FFT_THRESHOLD
4745 #define SQR_FFT_THRESHOLD sqr_fft_threshold
4746 extern mp_size_t sqr_fft_threshold;
4748 #undef SQR_FFT_MODF_THRESHOLD
4749 #define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
4750 extern mp_size_t sqr_fft_modf_threshold;
4752 #undef SQR_FFT_TABLE
4753 #define SQR_FFT_TABLE { 0 }
4755 #undef SQR_FFT_TABLE3
4756 #define SQR_FFT_TABLE3 { {0,0} }
4758 #undef MULLO_BASECASE_THRESHOLD
4759 #define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold
4760 extern mp_size_t mullo_basecase_threshold;
4762 #undef MULLO_DC_THRESHOLD
4763 #define MULLO_DC_THRESHOLD mullo_dc_threshold
4764 extern mp_size_t mullo_dc_threshold;
4766 #undef MULLO_MUL_N_THRESHOLD
4767 #define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold
4768 extern mp_size_t mullo_mul_n_threshold;
4770 #undef MULMID_TOOM42_THRESHOLD
4771 #define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold
4772 extern mp_size_t mulmid_toom42_threshold;
4774 #undef DIV_QR_2_PI2_THRESHOLD
4775 #define DIV_QR_2_PI2_THRESHOLD div_qr_2_pi2_threshold
4776 extern mp_size_t div_qr_2_pi2_threshold;
4778 #undef DC_DIV_QR_THRESHOLD
4779 #define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
4780 extern mp_size_t dc_div_qr_threshold;
4782 #undef DC_DIVAPPR_Q_THRESHOLD
4783 #define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
4784 extern mp_size_t dc_divappr_q_threshold;
4786 #undef DC_BDIV_Q_THRESHOLD
4787 #define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold
4788 extern mp_size_t dc_bdiv_q_threshold;
4790 #undef DC_BDIV_QR_THRESHOLD
4791 #define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold
4792 extern mp_size_t dc_bdiv_qr_threshold;
4794 #undef MU_DIV_QR_THRESHOLD
4795 #define MU_DIV_QR_THRESHOLD mu_div_qr_threshold
4796 extern mp_size_t mu_div_qr_threshold;
4798 #undef MU_DIVAPPR_Q_THRESHOLD
4799 #define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold
4800 extern mp_size_t mu_divappr_q_threshold;
4802 #undef MUPI_DIV_QR_THRESHOLD
4803 #define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold
4804 extern mp_size_t mupi_div_qr_threshold;
4806 #undef MU_BDIV_QR_THRESHOLD
4807 #define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold
4808 extern mp_size_t mu_bdiv_qr_threshold;
4810 #undef MU_BDIV_Q_THRESHOLD
4811 #define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold
4812 extern mp_size_t mu_bdiv_q_threshold;
4814 #undef INV_MULMOD_BNM1_THRESHOLD
4815 #define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold
4816 extern mp_size_t inv_mulmod_bnm1_threshold;
4818 #undef INV_NEWTON_THRESHOLD
4819 #define INV_NEWTON_THRESHOLD inv_newton_threshold
4820 extern mp_size_t inv_newton_threshold;
4822 #undef INV_APPR_THRESHOLD
4823 #define INV_APPR_THRESHOLD inv_appr_threshold
4824 extern mp_size_t inv_appr_threshold;
4826 #undef BINV_NEWTON_THRESHOLD
4827 #define BINV_NEWTON_THRESHOLD binv_newton_threshold
4828 extern mp_size_t binv_newton_threshold;
4830 #undef REDC_1_TO_REDC_2_THRESHOLD
4831 #define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold
4832 extern mp_size_t redc_1_to_redc_2_threshold;
4834 #undef REDC_2_TO_REDC_N_THRESHOLD
4835 #define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold
4836 extern mp_size_t redc_2_to_redc_n_threshold;
4838 #undef REDC_1_TO_REDC_N_THRESHOLD
4839 #define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold
4840 extern mp_size_t redc_1_to_redc_n_threshold;
4842 #undef MATRIX22_STRASSEN_THRESHOLD
4843 #define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold
4844 extern mp_size_t matrix22_strassen_threshold;
4846 #undef HGCD_THRESHOLD
4847 #define HGCD_THRESHOLD hgcd_threshold
4848 extern mp_size_t hgcd_threshold;
4850 #undef HGCD_APPR_THRESHOLD
4851 #define HGCD_APPR_THRESHOLD hgcd_appr_threshold
4852 extern mp_size_t hgcd_appr_threshold;
4854 #undef HGCD_REDUCE_THRESHOLD
4855 #define HGCD_REDUCE_THRESHOLD hgcd_reduce_threshold
4856 extern mp_size_t hgcd_reduce_threshold;
4858 #undef GCD_DC_THRESHOLD
4859 #define GCD_DC_THRESHOLD gcd_dc_threshold
4860 extern mp_size_t gcd_dc_threshold;
4862 #undef GCDEXT_DC_THRESHOLD
4863 #define GCDEXT_DC_THRESHOLD gcdext_dc_threshold
4864 extern mp_size_t gcdext_dc_threshold;
4866 #undef DIVREM_1_NORM_THRESHOLD
4867 #define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold
4868 extern mp_size_t divrem_1_norm_threshold;
4870 #undef DIVREM_1_UNNORM_THRESHOLD
4871 #define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold
4872 extern mp_size_t divrem_1_unnorm_threshold;
4874 #undef MOD_1_NORM_THRESHOLD
4875 #define MOD_1_NORM_THRESHOLD mod_1_norm_threshold
4876 extern mp_size_t mod_1_norm_threshold;
4878 #undef MOD_1_UNNORM_THRESHOLD
4879 #define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
4880 extern mp_size_t mod_1_unnorm_threshold;
4882 #undef MOD_1_1P_METHOD
4883 #define MOD_1_1P_METHOD mod_1_1p_method
4884 extern int mod_1_1p_method;
4886 #undef MOD_1N_TO_MOD_1_1_THRESHOLD
4887 #define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold
4888 extern mp_size_t mod_1n_to_mod_1_1_threshold;
4890 #undef MOD_1U_TO_MOD_1_1_THRESHOLD
4891 #define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold
4892 extern mp_size_t mod_1u_to_mod_1_1_threshold;
4894 #undef MOD_1_1_TO_MOD_1_2_THRESHOLD
4895 #define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold
4896 extern mp_size_t mod_1_1_to_mod_1_2_threshold;
4898 #undef MOD_1_2_TO_MOD_1_4_THRESHOLD
4899 #define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold
4900 extern mp_size_t mod_1_2_to_mod_1_4_threshold;
4902 #undef PREINV_MOD_1_TO_MOD_1_THRESHOLD
4903 #define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold
4904 extern mp_size_t preinv_mod_1_to_mod_1_threshold;
4906 #if ! UDIV_PREINV_ALWAYS
4907 #undef DIVREM_2_THRESHOLD
4908 #define DIVREM_2_THRESHOLD divrem_2_threshold
4909 extern mp_size_t divrem_2_threshold;
4912 #undef MULMOD_BNM1_THRESHOLD
4913 #define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold
4914 extern mp_size_t mulmod_bnm1_threshold;
4916 #undef SQRMOD_BNM1_THRESHOLD
4917 #define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold
4918 extern mp_size_t sqrmod_bnm1_threshold;
4920 #undef GET_STR_DC_THRESHOLD
4921 #define GET_STR_DC_THRESHOLD get_str_dc_threshold
4922 extern mp_size_t get_str_dc_threshold;
4924 #undef GET_STR_PRECOMPUTE_THRESHOLD
4925 #define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold
4926 extern mp_size_t get_str_precompute_threshold;
4928 #undef SET_STR_DC_THRESHOLD
4929 #define SET_STR_DC_THRESHOLD set_str_dc_threshold
4930 extern mp_size_t set_str_dc_threshold;
4932 #undef SET_STR_PRECOMPUTE_THRESHOLD
4933 #define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
4934 extern mp_size_t set_str_precompute_threshold;
4936 #undef FAC_ODD_THRESHOLD
4937 #define FAC_ODD_THRESHOLD fac_odd_threshold
4938 extern mp_size_t fac_odd_threshold;
4940 #undef FAC_DSC_THRESHOLD
4941 #define FAC_DSC_THRESHOLD fac_dsc_threshold
4942 extern mp_size_t fac_dsc_threshold;
4944 #undef FFT_TABLE_ATTRS
4945 #define FFT_TABLE_ATTRS
4946 extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
4947 #define FFT_TABLE3_SIZE 2000 /* generous space for tuning */
4948 extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
4950 /* Sizes the tune program tests up to, used in a couple of recompilations. */
4951 #undef MUL_TOOM22_THRESHOLD_LIMIT
4952 #undef MUL_TOOM33_THRESHOLD_LIMIT
4953 #undef MULLO_BASECASE_THRESHOLD_LIMIT
4954 #undef SQR_TOOM3_THRESHOLD_LIMIT
4955 #define SQR_TOOM2_MAX_GENERIC 200
4956 #define MUL_TOOM22_THRESHOLD_LIMIT 700
4957 #define MUL_TOOM33_THRESHOLD_LIMIT 700
4958 #define SQR_TOOM3_THRESHOLD_LIMIT 400
4959 #define MUL_TOOM44_THRESHOLD_LIMIT 1000
4960 #define SQR_TOOM4_THRESHOLD_LIMIT 1000
4961 #define MUL_TOOM6H_THRESHOLD_LIMIT 1100
4962 #define SQR_TOOM6_THRESHOLD_LIMIT 1100
4963 #define MUL_TOOM8H_THRESHOLD_LIMIT 1200
4964 #define SQR_TOOM8_THRESHOLD_LIMIT 1200
4965 #define MULLO_BASECASE_THRESHOLD_LIMIT 200
4966 #define GET_STR_THRESHOLD_LIMIT 150
4967 #define FAC_DSC_THRESHOLD_LIMIT 2048
4969 #endif /* TUNE_PROGRAM_BUILD */
4971 #if defined (__cplusplus)
4975 /* FIXME: Make these itch functions less conservative. Also consider making
4976 them dependent on just 'an', and compute the allocation directly from 'an'
4977 instead of via n. */
4979 /* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
4980 k is ths smallest k such that
4981 ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
4983 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
4984 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
4986 #define mpn_toom22_mul_itch(an, bn) \
4987 (2 * ((an) + GMP_NUMB_BITS))
4988 #define mpn_toom2_sqr_itch(an) \
4989 (2 * ((an) + GMP_NUMB_BITS))
4991 /* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
4992 We use 3an + C, so that we can use a smaller constant.
4994 #define mpn_toom33_mul_itch(an, bn) \
4995 (3 * (an) + GMP_NUMB_BITS)
4996 #define mpn_toom3_sqr_itch(an) \
4997 (3 * (an) + GMP_NUMB_BITS)
4999 /* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
5000 We use 3an + C, so that we can use a smaller constant.
5002 #define mpn_toom44_mul_itch(an, bn) \
5003 (3 * (an) + GMP_NUMB_BITS)
5004 #define mpn_toom4_sqr_itch(an) \
5005 (3 * (an) + GMP_NUMB_BITS)
5007 #define mpn_toom6_sqr_itch(n) \
5008 (((n) - SQR_TOOM6_THRESHOLD)*2 + \
5009 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \
5010 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
5012 #define MUL_TOOM6H_MIN \
5013 ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ? \
5014 MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
5015 #define mpn_toom6_mul_n_itch(n) \
5016 (((n) - MUL_TOOM6H_MIN)*2 + \
5017 MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6, \
5018 mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
5020 static inline mp_size_t
5021 mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
5022 mp_size_t estimatedN;
5023 estimatedN = (an + bn) / (size_t) 10 + 1;
5024 return mpn_toom6_mul_n_itch (estimatedN * 6);
5027 #define mpn_toom8_sqr_itch(n) \
5028 ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \
5029 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
5030 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
5032 #define MUL_TOOM8H_MIN \
5033 ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ? \
5034 MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
5035 #define mpn_toom8_mul_n_itch(n) \
5036 ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) + \
5037 MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6, \
5038 mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
5040 static inline mp_size_t
5041 mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
5042 mp_size_t estimatedN;
5043 estimatedN = (an + bn) / (size_t) 14 + 1;
5044 return mpn_toom8_mul_n_itch (estimatedN * 8);
5047 static inline mp_size_t
5048 mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
5050 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
5051 mp_size_t itch = 2 * n + 1;
5056 static inline mp_size_t
5057 mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
5059 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
5063 static inline mp_size_t
5064 mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
5066 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
5071 static inline mp_size_t
5072 mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
5074 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
5078 static inline mp_size_t
5079 mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
5081 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
5085 static inline mp_size_t
5086 mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
5088 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
5092 static inline mp_size_t
5093 mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
5095 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
5099 static inline mp_size_t
5100 mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
5102 mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
5106 /* let S(n) = space required for input size n,
5107 then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */
5108 #define mpn_toom42_mulmid_itch(n) \
5109 (3 * (n) + GMP_NUMB_BITS)
5112 #define mpn_fft_mul mpn_mul_fft_full
5114 #define mpn_fft_mul mpn_nussbaumer_mul
5119 /* A little helper for a null-terminated __gmp_allocate_func string.
5120 The destructor ensures it's freed even if an exception is thrown.
5121 The len field is needed by the destructor, and can be used by anyone else
5122 to avoid a second strlen pass over the data.
5124 Since our input is a C string, using strlen is correct. Perhaps it'd be
5125 more C++-ish style to use std::char_traits<char>::length, but char_traits
5126 isn't available in gcc 2.95.4. */
5128 class gmp_allocated_string {
5132 gmp_allocated_string(char *arg)
5135 len = std::strlen (str);
5137 ~gmp_allocated_string()
5139 (*__gmp_free_func) (str, len+1);
5143 std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
5144 int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
5145 void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
5146 void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
5147 std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
5148 extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
5150 #endif /* __cplusplus */
5152 #endif /* __GMP_IMPL_H__ */