1 /* Reference mpn functions, designed to be simple, portable and independent
2 of the normal gmp code. Speed isn't a consideration.
4 Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
5 2007, 2008, 2009 Free Software Foundation, Inc.
7 This file is part of the GNU MP Library.
9 The GNU MP Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
14 The GNU MP Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17 License for more details.
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23 /* Most routines have assertions representing what the mpn routines are
24 supposed to accept. Many of these reference routines do sensible things
25 outside these ranges (eg. for size==0), but the assertions are present to
26 pick up bad parameters passed here that are about to be passed the same
27 to a real mpn routine being compared. */
29 /* always do assertion checking */
32 #include <stdio.h> /* for NULL */
33 #include <stdlib.h> /* for malloc */
43 /* Return non-zero if regions {xp,xsize} and {yp,ysize} overlap, with sizes
46 byte_overlap_p (const void *v_xp, mp_size_t xsize,
47 const void *v_yp, mp_size_t ysize)
49 const char *xp = v_xp;
50 const char *yp = v_yp;
56 ASSERT (xp+xsize >= xp);
57 ASSERT (yp+ysize >= yp);
68 /* Return non-zero if limb regions {xp,xsize} and {yp,ysize} overlap. */
70 refmpn_overlap_p (mp_srcptr xp, mp_size_t xsize, mp_srcptr yp, mp_size_t ysize)
72 return byte_overlap_p (xp, xsize * BYTES_PER_MP_LIMB,
73 yp, ysize * BYTES_PER_MP_LIMB);
76 /* Check overlap for a routine defined to work low to high. */
78 refmpn_overlap_low_to_high_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
80 return (dst <= src || ! refmpn_overlap_p (dst, size, src, size));
83 /* Check overlap for a routine defined to work high to low. */
85 refmpn_overlap_high_to_low_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
87 return (dst >= src || ! refmpn_overlap_p (dst, size, src, size));
90 /* Check overlap for a standard routine requiring equal or separate. */
92 refmpn_overlap_fullonly_p (mp_srcptr dst, mp_srcptr src, mp_size_t size)
94 return (dst == src || ! refmpn_overlap_p (dst, size, src, size));
97 refmpn_overlap_fullonly_two_p (mp_srcptr dst, mp_srcptr src1, mp_srcptr src2,
100 return (refmpn_overlap_fullonly_p (dst, src1, size)
101 && refmpn_overlap_fullonly_p (dst, src2, size));
106 refmpn_malloc_limbs (mp_size_t size)
112 p = (mp_ptr) malloc ((size_t) (size * BYTES_PER_MP_LIMB));
117 /* Free limbs allocated by refmpn_malloc_limbs. NOTE: Can't free
118 * memory allocated by refmpn_malloc_limbs_aligned. */
120 refmpn_free_limbs (mp_ptr p)
126 refmpn_memdup_limbs (mp_srcptr ptr, mp_size_t size)
129 p = refmpn_malloc_limbs (size);
130 refmpn_copyi (p, ptr, size);
134 /* malloc n limbs on a multiple of m bytes boundary */
136 refmpn_malloc_limbs_aligned (mp_size_t n, size_t m)
138 return (mp_ptr) align_pointer (refmpn_malloc_limbs (n + m-1), m);
143 refmpn_fill (mp_ptr ptr, mp_size_t size, mp_limb_t value)
147 for (i = 0; i < size; i++)
152 refmpn_zero (mp_ptr ptr, mp_size_t size)
154 refmpn_fill (ptr, size, CNST_LIMB(0));
158 refmpn_zero_extend (mp_ptr ptr, mp_size_t oldsize, mp_size_t newsize)
160 ASSERT (newsize >= oldsize);
161 refmpn_zero (ptr+oldsize, newsize-oldsize);
165 refmpn_zero_p (mp_srcptr ptr, mp_size_t size)
168 for (i = 0; i < size; i++)
175 refmpn_normalize (mp_srcptr ptr, mp_size_t size)
178 while (size > 0 && ptr[size-1] == 0)
183 /* the highest one bit in x */
185 refmpn_msbone (mp_limb_t x)
187 mp_limb_t n = (mp_limb_t) 1 << (GMP_LIMB_BITS-1);
198 /* a mask of the highest one bit plus and all bits below */
200 refmpn_msbone_mask (mp_limb_t x)
205 return (refmpn_msbone (x) << 1) - 1;
208 /* How many digits in the given base will fit in a limb.
209 Notice that the product b is allowed to be equal to the limit
210 2^GMP_NUMB_BITS, this ensures the result for base==2 will be
211 GMP_NUMB_BITS (and similarly other powers of 2). */
213 refmpn_chars_per_limb (int base)
215 mp_limb_t limit[2], b[2];
220 limit[0] = 0; /* limit = 2^GMP_NUMB_BITS */
222 b[0] = 1; /* b = 1 */
228 if (refmpn_mul_1 (b, b, (mp_size_t) 2, (mp_limb_t) base))
230 if (refmpn_cmp (b, limit, (mp_size_t) 2) > 0)
234 return chars_per_limb;
237 /* The biggest value base**n which fits in GMP_NUMB_BITS. */
239 refmpn_big_base (int base)
241 int chars_per_limb = refmpn_chars_per_limb (base);
247 for (i = 0; i < chars_per_limb; i++)
254 refmpn_setbit (mp_ptr ptr, unsigned long bit)
256 ptr[bit/GMP_NUMB_BITS] |= CNST_LIMB(1) << (bit%GMP_NUMB_BITS);
260 refmpn_clrbit (mp_ptr ptr, unsigned long bit)
262 ptr[bit/GMP_NUMB_BITS] &= ~ (CNST_LIMB(1) << (bit%GMP_NUMB_BITS));
265 #define REFMPN_TSTBIT(ptr,bit) \
266 (((ptr)[(bit)/GMP_NUMB_BITS] & (CNST_LIMB(1) << ((bit)%GMP_NUMB_BITS))) != 0)
269 refmpn_tstbit (mp_srcptr ptr, unsigned long bit)
271 return REFMPN_TSTBIT (ptr, bit);
275 refmpn_scan0 (mp_srcptr ptr, unsigned long bit)
277 while (REFMPN_TSTBIT (ptr, bit) != 0)
283 refmpn_scan1 (mp_srcptr ptr, unsigned long bit)
285 while (REFMPN_TSTBIT (ptr, bit) == 0)
291 refmpn_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size)
293 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
294 refmpn_copyi (rp, sp, size);
298 refmpn_copyi (mp_ptr rp, mp_srcptr sp, mp_size_t size)
302 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
305 for (i = 0; i < size; i++)
310 refmpn_copyd (mp_ptr rp, mp_srcptr sp, mp_size_t size)
314 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
317 for (i = size-1; i >= 0; i--)
321 /* Copy {xp,xsize} to {wp,wsize}. If x is shorter, then pad w with low
322 zeros to wsize. If x is longer, then copy just the high wsize limbs. */
324 refmpn_copy_extend (mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize)
329 /* high part of x if x bigger than w */
336 refmpn_copy (wp + wsize-xsize, xp, xsize);
337 refmpn_zero (wp, wsize-xsize);
341 refmpn_cmp (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
346 ASSERT_MPN (xp, size);
347 ASSERT_MPN (yp, size);
349 for (i = size-1; i >= 0; i--)
351 if (xp[i] > yp[i]) return 1;
352 if (xp[i] < yp[i]) return -1;
358 refmpn_cmp_allowzero (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
363 return refmpn_cmp (xp, yp, size);
367 refmpn_cmp_twosizes (mp_srcptr xp, mp_size_t xsize,
368 mp_srcptr yp, mp_size_t ysize)
372 ASSERT_MPN (xp, xsize);
373 ASSERT_MPN (yp, ysize);
375 opp = (xsize < ysize);
377 MPN_SRCPTR_SWAP (xp,xsize, yp,ysize);
379 if (! refmpn_zero_p (xp+ysize, xsize-ysize))
382 cmp = refmpn_cmp (xp, yp, ysize);
384 return (opp ? -cmp : cmp);
388 refmpn_equal_anynail (mp_srcptr xp, mp_srcptr yp, mp_size_t size)
393 for (i = 0; i < size; i++)
400 #define LOGOPS(operation) \
404 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
405 ASSERT (size >= 1); \
406 ASSERT_MPN (s1p, size); \
407 ASSERT_MPN (s2p, size); \
409 for (i = 0; i < size; i++) \
414 refmpn_and_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
416 LOGOPS (s1p[i] & s2p[i]);
419 refmpn_andn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
421 LOGOPS (s1p[i] & ~s2p[i]);
424 refmpn_nand_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
426 LOGOPS ((s1p[i] & s2p[i]) ^ GMP_NUMB_MASK);
429 refmpn_ior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
431 LOGOPS (s1p[i] | s2p[i]);
434 refmpn_iorn_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
436 LOGOPS (s1p[i] | (s2p[i] ^ GMP_NUMB_MASK));
439 refmpn_nior_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
441 LOGOPS ((s1p[i] | s2p[i]) ^ GMP_NUMB_MASK);
444 refmpn_xor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
446 LOGOPS (s1p[i] ^ s2p[i]);
449 refmpn_xnor_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
451 LOGOPS ((s1p[i] ^ s2p[i]) ^ GMP_NUMB_MASK);
455 /* set *dh,*dl to mh:ml - sh:sl, in full limbs */
457 refmpn_sub_ddmmss (mp_limb_t *dh, mp_limb_t *dl,
458 mp_limb_t mh, mp_limb_t ml, mp_limb_t sh, mp_limb_t sl)
461 *dh = mh - sh - (ml < sl);
465 /* set *w to x+y, return 0 or 1 carry */
467 ref_addc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
475 #if GMP_NAIL_BITS == 0
479 *w = sum & GMP_NUMB_MASK;
480 cy = (sum >> GMP_NUMB_BITS);
485 /* set *w to x-y, return 0 or 1 borrow */
487 ref_subc_limb (mp_limb_t *w, mp_limb_t x, mp_limb_t y)
495 #if GMP_NAIL_BITS == 0
499 *w = diff & GMP_NUMB_MASK;
500 cy = (diff >> GMP_NUMB_BITS) & 1;
505 /* set *w to x+y+c (where c is 0 or 1), return 0 or 1 carry */
507 adc (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
513 ASSERT (c == 0 || c == 1);
515 r = ref_addc_limb (w, x, y);
516 return r + ref_addc_limb (w, *w, c);
519 /* set *w to x-y-c (where c is 0 or 1), return 0 or 1 borrow */
521 sbb (mp_limb_t *w, mp_limb_t x, mp_limb_t y, mp_limb_t c)
527 ASSERT (c == 0 || c == 1);
529 r = ref_subc_limb (w, x, y);
530 return r + ref_subc_limb (w, *w, c);
534 #define AORS_1(operation) \
538 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
539 ASSERT (size >= 1); \
540 ASSERT_MPN (sp, size); \
543 for (i = 0; i < size; i++) \
544 n = operation (&rp[i], sp[i], n); \
549 refmpn_add_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
551 AORS_1 (ref_addc_limb);
554 refmpn_sub_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t n)
556 AORS_1 (ref_subc_limb);
559 #define AORS_NC(operation) \
563 ASSERT (refmpn_overlap_fullonly_two_p (rp, s1p, s2p, size)); \
564 ASSERT (carry == 0 || carry == 1); \
565 ASSERT (size >= 1); \
566 ASSERT_MPN (s1p, size); \
567 ASSERT_MPN (s2p, size); \
569 for (i = 0; i < size; i++) \
570 carry = operation (&rp[i], s1p[i], s2p[i], carry); \
575 refmpn_add_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
581 refmpn_sub_nc (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
589 refmpn_add_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
591 return refmpn_add_nc (rp, s1p, s2p, size, CNST_LIMB(0));
594 refmpn_sub_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
596 return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
600 refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
601 mp_size_t n, unsigned int s)
606 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
608 ASSERT (0 < s && s < GMP_NUMB_BITS);
612 tp = refmpn_malloc_limbs (n);
613 cy = refmpn_lshift (tp, vp, n, s);
614 cy += refmpn_add_n (rp, up, tp, n);
619 refmpn_addlsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
621 return refmpn_addlsh_n (rp, up, vp, n, 1);
624 refmpn_addlsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
626 return refmpn_addlsh_n (rp, up, vp, n, 2);
630 refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
631 mp_size_t n, unsigned int s)
636 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
638 ASSERT (0 < s && s < GMP_NUMB_BITS);
642 tp = refmpn_malloc_limbs (n);
643 cy = mpn_lshift (tp, vp, n, s);
644 cy += mpn_sub_n (rp, up, tp, n);
649 refmpn_sublsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
651 return refmpn_sublsh_n (rp, up, vp, n, 1);
655 refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
656 mp_size_t n, unsigned int s)
661 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
663 ASSERT (0 < s && s < GMP_NUMB_BITS);
667 tp = refmpn_malloc_limbs (n);
668 cy = mpn_lshift (tp, vp, n, s);
669 cy -= mpn_sub_n (rp, tp, up, n);
674 refmpn_rsblsh1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
676 return refmpn_rsblsh_n (rp, up, vp, n, 1);
679 refmpn_rsblsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
681 return refmpn_rsblsh_n (rp, up, vp, n, 2);
685 refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
689 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
694 cya = mpn_add_n (rp, up, vp, n);
695 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
696 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
700 refmpn_rsh1sub_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
704 ASSERT (refmpn_overlap_fullonly_two_p (rp, up, vp, n));
709 cya = mpn_sub_n (rp, up, vp, n);
710 cys = mpn_rshift (rp, rp, n, 1) >> (GMP_NUMB_BITS - 1);
711 rp[n - 1] |= cya << (GMP_NUMB_BITS - 1);
715 /* Twos complement, return borrow. */
717 refmpn_neg (mp_ptr dst, mp_srcptr src, mp_size_t size)
724 zeros = refmpn_malloc_limbs (size);
725 refmpn_fill (zeros, size, CNST_LIMB(0));
726 ret = refmpn_sub_n (dst, zeros, src, size);
732 #define AORS(aors_n, aors_1) \
735 ASSERT (s1size >= s2size); \
736 ASSERT (s2size >= 1); \
737 c = aors_n (rp, s1p, s2p, s2size); \
738 if (s1size-s2size != 0) \
739 c = aors_1 (rp+s2size, s1p+s2size, s1size-s2size, c); \
743 refmpn_add (mp_ptr rp,
744 mp_srcptr s1p, mp_size_t s1size,
745 mp_srcptr s2p, mp_size_t s2size)
747 AORS (refmpn_add_n, refmpn_add_1);
750 refmpn_sub (mp_ptr rp,
751 mp_srcptr s1p, mp_size_t s1size,
752 mp_srcptr s2p, mp_size_t s2size)
754 AORS (refmpn_sub_n, refmpn_sub_1);
758 #define SHIFTHIGH(x) ((x) << GMP_LIMB_BITS/2)
759 #define SHIFTLOW(x) ((x) >> GMP_LIMB_BITS/2)
761 #define LOWMASK (((mp_limb_t) 1 << GMP_LIMB_BITS/2)-1)
762 #define HIGHMASK SHIFTHIGH(LOWMASK)
764 #define LOWPART(x) ((x) & LOWMASK)
765 #define HIGHPART(x) SHIFTLOW((x) & HIGHMASK)
767 /* Set return:*lo to x*y, using full limbs not nails. */
769 refmpn_umul_ppmm (mp_limb_t *lo, mp_limb_t x, mp_limb_t y)
773 *lo = LOWPART(x) * LOWPART(y);
774 hi = HIGHPART(x) * HIGHPART(y);
776 s = LOWPART(x) * HIGHPART(y);
778 s = SHIFTHIGH(LOWPART(s));
782 s = HIGHPART(x) * LOWPART(y);
784 s = SHIFTHIGH(LOWPART(s));
792 refmpn_umul_ppmm_r (mp_limb_t x, mp_limb_t y, mp_limb_t *lo)
794 return refmpn_umul_ppmm (lo, x, y);
798 refmpn_mul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier,
804 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
806 ASSERT_MPN (sp, size);
807 ASSERT_LIMB (multiplier);
810 multiplier <<= GMP_NAIL_BITS;
811 for (i = 0; i < size; i++)
813 hi = refmpn_umul_ppmm (&lo, sp[i], multiplier);
814 lo >>= GMP_NAIL_BITS;
815 ASSERT_NOCARRY (ref_addc_limb (&hi, hi, ref_addc_limb (&lo, lo, carry)));
823 refmpn_mul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
825 return refmpn_mul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
830 refmpn_mul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
831 mp_srcptr mult, mp_size_t msize)
837 ASSERT (refmpn_overlap_fullonly_p (dst, src, size));
838 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
839 ASSERT (size >= msize);
840 ASSERT_MPN (mult, msize);
842 /* in case dst==src */
843 src_copy = refmpn_malloc_limbs (size);
844 refmpn_copyi (src_copy, src, size);
847 dst[size] = refmpn_mul_1 (dst, src, size, mult[0]);
848 for (i = 1; i < msize-1; i++)
849 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
850 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
857 refmpn_mul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
859 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 2);
862 refmpn_mul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
864 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 3);
867 refmpn_mul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
869 return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
872 #define AORSMUL_1C(operation_n) \
877 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
879 p = refmpn_malloc_limbs (size); \
880 ret = refmpn_mul_1c (p, sp, size, multiplier, carry); \
881 ret += operation_n (rp, rp, p, size); \
888 refmpn_addmul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
889 mp_limb_t multiplier, mp_limb_t carry)
891 AORSMUL_1C (refmpn_add_n);
894 refmpn_submul_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
895 mp_limb_t multiplier, mp_limb_t carry)
897 AORSMUL_1C (refmpn_sub_n);
902 refmpn_addmul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
904 return refmpn_addmul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
907 refmpn_submul_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t multiplier)
909 return refmpn_submul_1c (rp, sp, size, multiplier, CNST_LIMB(0));
914 refmpn_addmul_N (mp_ptr dst, mp_srcptr src, mp_size_t size,
915 mp_srcptr mult, mp_size_t msize)
921 ASSERT (dst == src || ! refmpn_overlap_p (dst, size+msize-1, src, size));
922 ASSERT (! refmpn_overlap_p (dst, size+msize-1, mult, msize));
923 ASSERT (size >= msize);
924 ASSERT_MPN (mult, msize);
926 /* in case dst==src */
927 src_copy = refmpn_malloc_limbs (size);
928 refmpn_copyi (src_copy, src, size);
931 for (i = 0; i < msize-1; i++)
932 dst[size+i] = refmpn_addmul_1 (dst+i, src, size, mult[i]);
933 ret = refmpn_addmul_1 (dst+i, src, size, mult[i]);
940 refmpn_addmul_2 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
942 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 2);
945 refmpn_addmul_3 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
947 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 3);
950 refmpn_addmul_4 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
952 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 4);
955 refmpn_addmul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
957 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 5);
960 refmpn_addmul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
962 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 6);
965 refmpn_addmul_7 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
967 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 7);
970 refmpn_addmul_8 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
972 return refmpn_addmul_N (rp, sp, size, mult, (mp_size_t) 8);
976 refmpn_add_n_sub_nc (mp_ptr r1p, mp_ptr r2p,
977 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size,
983 /* Destinations can't overlap. */
984 ASSERT (! refmpn_overlap_p (r1p, size, r2p, size));
985 ASSERT (refmpn_overlap_fullonly_two_p (r1p, s1p, s2p, size));
986 ASSERT (refmpn_overlap_fullonly_two_p (r2p, s1p, s2p, size));
989 /* in case r1p==s1p or r1p==s2p */
990 p = refmpn_malloc_limbs (size);
992 acy = refmpn_add_nc (p, s1p, s2p, size, carry >> 1);
993 scy = refmpn_sub_nc (r2p, s1p, s2p, size, carry & 1);
994 refmpn_copyi (r1p, p, size);
997 return 2 * acy + scy;
1001 refmpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p,
1002 mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1004 return refmpn_add_n_sub_nc (r1p, r2p, s1p, s2p, size, CNST_LIMB(0));
1008 /* Right shift hi,lo and return the low limb of the result.
1009 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1011 rshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1013 ASSERT (shift < GMP_NUMB_BITS);
1017 return ((hi << (GMP_NUMB_BITS-shift)) | (lo >> shift)) & GMP_NUMB_MASK;
1020 /* Left shift hi,lo and return the high limb of the result.
1021 Note a shift by GMP_LIMB_BITS isn't assumed to work (doesn't on x86). */
1023 lshift_make (mp_limb_t hi, mp_limb_t lo, unsigned shift)
1025 ASSERT (shift < GMP_NUMB_BITS);
1029 return ((hi << shift) | (lo >> (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK;
1034 refmpn_rshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1039 ASSERT (refmpn_overlap_low_to_high_p (rp, sp, size));
1041 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1042 ASSERT_MPN (sp, size);
1044 ret = rshift_make (sp[0], CNST_LIMB(0), shift);
1046 for (i = 0; i < size-1; i++)
1047 rp[i] = rshift_make (sp[i+1], sp[i], shift);
1049 rp[i] = rshift_make (CNST_LIMB(0), sp[i], shift);
1054 refmpn_lshift (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1059 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1061 ASSERT (shift >= 1 && shift < GMP_NUMB_BITS);
1062 ASSERT_MPN (sp, size);
1064 ret = lshift_make (CNST_LIMB(0), sp[size-1], shift);
1066 for (i = size-2; i >= 0; i--)
1067 rp[i+1] = lshift_make (sp[i+1], sp[i], shift);
1069 rp[i+1] = lshift_make (sp[i+1], CNST_LIMB(0), shift);
1074 refmpn_com (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1078 /* We work downwards since mpn_lshiftc needs that. */
1079 ASSERT (refmpn_overlap_high_to_low_p (rp, sp, size));
1081 for (i = size - 1; i >= 0; i--)
1082 rp[i] = (~sp[i]) & GMP_NUMB_MASK;
1086 refmpn_lshiftc (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1090 /* No asserts here, refmpn_lshift will assert what we need. */
1092 res = refmpn_lshift (rp, sp, size, shift);
1093 refmpn_com (rp, rp, size);
1097 /* accepting shift==0 and doing a plain copyi or copyd in that case */
1099 refmpn_rshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1103 refmpn_copyi (rp, sp, size);
1108 return refmpn_rshift (rp, sp, size, shift);
1112 refmpn_lshift_or_copy (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned shift)
1116 refmpn_copyd (rp, sp, size);
1121 return refmpn_lshift (rp, sp, size, shift);
1125 /* accepting size==0 too */
1127 refmpn_rshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1130 return (size == 0 ? 0 : refmpn_rshift_or_copy (rp, sp, size, shift));
1133 refmpn_lshift_or_copy_any (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1136 return (size == 0 ? 0 : refmpn_lshift_or_copy (rp, sp, size, shift));
1139 /* Divide h,l by d, return quotient, store remainder to *rp.
1140 Operates on full limbs, not nails.
1142 __udiv_qrnnd_c isn't simple, and it's a bit slow, but it works. */
1144 refmpn_udiv_qrnnd (mp_limb_t *rp, mp_limb_t h, mp_limb_t l, mp_limb_t d)
1153 udiv_qrnnd (q, r, h, l, d);
1158 n = refmpn_count_leading_zeros (d);
1163 h = (h << n) | (l >> (GMP_LIMB_BITS - n));
1167 __udiv_qrnnd_c (q, r, h, l, d);
1174 refmpn_udiv_qrnnd_r (mp_limb_t h, mp_limb_t l, mp_limb_t d, mp_limb_t *rp)
1176 return refmpn_udiv_qrnnd (rp, h, l, d);
1179 /* This little subroutine avoids some bad code generation from i386 gcc 3.0
1180 -fPIC -O2 -fomit-frame-pointer (%ebp being used uninitialized). */
1182 refmpn_divmod_1c_workaround (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1183 mp_limb_t divisor, mp_limb_t carry)
1187 for (i = size-1; i >= 0; i--)
1189 rp[i] = refmpn_udiv_qrnnd (rem, carry,
1190 sp[i] << GMP_NAIL_BITS,
1191 divisor << GMP_NAIL_BITS);
1192 carry = *rem >> GMP_NAIL_BITS;
1198 refmpn_divmod_1c (mp_ptr rp, mp_srcptr sp, mp_size_t size,
1199 mp_limb_t divisor, mp_limb_t carry)
1203 mp_limb_t carry_orig;
1205 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1207 ASSERT (carry < divisor);
1208 ASSERT_MPN (sp, size);
1209 ASSERT_LIMB (divisor);
1210 ASSERT_LIMB (carry);
1215 sp_orig = refmpn_memdup_limbs (sp, size);
1216 prod = refmpn_malloc_limbs (size);
1219 carry = refmpn_divmod_1c_workaround (rp, sp, size, divisor, carry);
1221 /* check by multiplying back */
1223 printf ("size=%ld divisor=0x%lX carry=0x%lX remainder=0x%lX\n",
1224 size, divisor, carry_orig, carry);
1225 mpn_trace("s",sp_copy,size);
1226 mpn_trace("r",rp,size);
1227 printf ("mul_1c %lX\n", refmpn_mul_1c (prod, rp, size, divisor, carry));
1228 mpn_trace("p",prod,size);
1230 ASSERT (refmpn_mul_1c (prod, rp, size, divisor, carry) == carry_orig);
1231 ASSERT (refmpn_cmp (prod, sp_orig, size) == 0);
1239 refmpn_divmod_1 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1241 return refmpn_divmod_1c (rp, sp, size, divisor, CNST_LIMB(0));
1246 refmpn_mod_1c (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1249 mp_ptr p = refmpn_malloc_limbs (size);
1250 carry = refmpn_divmod_1c (p, sp, size, divisor, carry);
1256 refmpn_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1258 return refmpn_mod_1c (sp, size, divisor, CNST_LIMB(0));
1262 refmpn_preinv_mod_1 (mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1265 ASSERT (divisor & GMP_NUMB_HIGHBIT);
1266 ASSERT (inverse == refmpn_invert_limb (divisor));
1267 return refmpn_mod_1 (sp, size, divisor);
1270 /* This implementation will be rather slow, but has the advantage of being
1271 in a different style than the libgmp versions. */
1273 refmpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
1275 ASSERT ((GMP_NUMB_BITS % 4) == 0);
1276 return mpn_mod_1 (p, n, (CNST_LIMB(1) << (3 * GMP_NUMB_BITS / 4)) - 1);
1281 refmpn_divrem_1c (mp_ptr rp, mp_size_t xsize,
1282 mp_srcptr sp, mp_size_t size, mp_limb_t divisor,
1287 z = refmpn_malloc_limbs (xsize);
1288 refmpn_fill (z, xsize, CNST_LIMB(0));
1290 carry = refmpn_divmod_1c (rp+xsize, sp, size, divisor, carry);
1291 carry = refmpn_divmod_1c (rp, z, xsize, divisor, carry);
1298 refmpn_divrem_1 (mp_ptr rp, mp_size_t xsize,
1299 mp_srcptr sp, mp_size_t size, mp_limb_t divisor)
1301 return refmpn_divrem_1c (rp, xsize, sp, size, divisor, CNST_LIMB(0));
1305 refmpn_preinv_divrem_1 (mp_ptr rp, mp_size_t xsize,
1306 mp_srcptr sp, mp_size_t size,
1307 mp_limb_t divisor, mp_limb_t inverse, unsigned shift)
1310 ASSERT (shift == refmpn_count_leading_zeros (divisor));
1311 ASSERT (inverse == refmpn_invert_limb (divisor << shift));
1313 return refmpn_divrem_1 (rp, xsize, sp, size, divisor);
1317 refmpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
1318 mp_ptr np, mp_size_t nn,
1324 tp = refmpn_malloc_limbs (nn + qxn);
1325 refmpn_zero (tp, qxn);
1326 refmpn_copyi (tp + qxn, np, nn);
1327 qh = refmpn_sb_div_qr (qp, tp, nn + qxn, dp, 2);
1328 refmpn_copyi (np, tp, 2);
1333 /* Inverse is floor((b*(b-d)-1) / d), per division by invariant integers
1334 paper, figure 8.1 m', where b=2^GMP_LIMB_BITS. Note that -d-1 < d
1335 since d has the high bit set. */
1338 refmpn_invert_limb (mp_limb_t d)
1341 ASSERT (d & GMP_LIMB_HIGHBIT);
1342 return refmpn_udiv_qrnnd (&r, -d-1, MP_LIMB_T_MAX, d);
1346 refmpn_invert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1352 tp = TMP_ALLOC_LIMBS (2 * n);
1353 qp = TMP_ALLOC_LIMBS (n + 1);
1355 MPN_ZERO (tp, 2 * n); mpn_sub_1 (tp, tp, 2 * n, 1);
1357 refmpn_tdiv_qr (qp, rp, 0, tp, 2 * n, up, n);
1358 refmpn_copyi (rp, qp, n);
1364 refmpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch)
1371 /* We use the library mpn_sbpi1_bdiv_q here, which isn't kosher in testing
1372 code. To make up for it, we check that the inverse is correct using a
1375 tp = TMP_ALLOC_LIMBS (2 * n);
1379 binvert_limb (binv, up[0]);
1380 mpn_sbpi1_bdiv_q (rp, tp, n, up, n, -binv);
1382 refmpn_mul_n (tp, rp, up, n);
1383 ASSERT_ALWAYS (tp[0] == 1 && mpn_zero_p (tp + 1, n - 1));
1388 /* The aim is to produce a dst quotient and return a remainder c, satisfying
1389 c*b^n + src-i == 3*dst, where i is the incoming carry.
1391 Some value c==0, c==1 or c==2 will satisfy, so just try each.
1393 If GMP_NUMB_BITS is even then 2^GMP_NUMB_BITS==1mod3 and a non-zero
1394 remainder from the first division attempt determines the correct
1395 remainder (3-c), but don't bother with that, since we can't guarantee
1396 anything about GMP_NUMB_BITS when using nails.
1398 If the initial src-i produces a borrow then refmpn_sub_1 leaves a twos
1399 complement negative, ie. b^n+a-i, and the calculation produces c1
1400 satisfying c1*b^n + b^n+src-i == 3*dst, from which clearly c=c1+1. This
1401 means it's enough to just add any borrow back at the end.
1403 A borrow only occurs when a==0 or a==1, and, by the same reasoning as in
1404 mpn/generic/diveby3.c, the c1 that results in those cases will only be 0
1405 or 1 respectively, so with 1 added the final return value is still in the
1406 prescribed range 0 to 2. */
1409 refmpn_divexact_by3c (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_limb_t carry)
1414 ASSERT (refmpn_overlap_fullonly_p (rp, sp, size));
1416 ASSERT (carry <= 2);
1417 ASSERT_MPN (sp, size);
1419 spcopy = refmpn_malloc_limbs (size);
1420 cs = refmpn_sub_1 (spcopy, sp, size, carry);
1422 for (c = 0; c <= 2; c++)
1423 if (refmpn_divmod_1c (rp, spcopy, size, CNST_LIMB(3), c) == 0)
1425 ASSERT_FAIL (no value of c satisfies);
1436 refmpn_divexact_by3 (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1438 return refmpn_divexact_by3c (rp, sp, size, CNST_LIMB(0));
1442 /* The same as mpn/generic/mul_basecase.c, but using refmpn functions. */
1444 refmpn_mul_basecase (mp_ptr prodp,
1445 mp_srcptr up, mp_size_t usize,
1446 mp_srcptr vp, mp_size_t vsize)
1450 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1451 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1452 ASSERT (usize >= vsize);
1453 ASSERT (vsize >= 1);
1454 ASSERT_MPN (up, usize);
1455 ASSERT_MPN (vp, vsize);
1457 prodp[usize] = refmpn_mul_1 (prodp, up, usize, vp[0]);
1458 for (i = 1; i < vsize; i++)
1459 prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
1462 #define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
1463 #define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
1465 #define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
1467 #define FFT_THRESHOLD MP_SIZE_T_MAX /* don't use toom44 here */
1471 refmpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
1477 if (vn < TOOM3_THRESHOLD)
1479 /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
1482 refmpn_mul_basecase (wp, up, un, vp, vn);
1488 if (vn < TOOM4_THRESHOLD)
1490 /* In the mpn_toom33_mul range, use mpn_toom22_mul. */
1491 tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
1492 tp = refmpn_malloc_limbs (tn);
1493 mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1495 else if (vn < FFT_THRESHOLD)
1497 /* In the mpn_toom44_mul range, use mpn_toom33_mul. */
1498 tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
1499 tp = refmpn_malloc_limbs (tn);
1500 mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1504 /* Finally, for the largest operands, use mpn_toom44_mul. */
1505 tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
1506 tp = refmpn_malloc_limbs (tn);
1507 mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
1513 refmpn_mul (wp + vn, vp, vn, up + vn, un - vn);
1515 refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
1517 MPN_COPY (wp, tp, vn);
1518 cy = refmpn_add (wp + vn, wp + vn, un, tp + vn, vn);
1522 MPN_COPY (wp, tp, 2 * vn);
1529 refmpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1531 refmpn_mul (prodp, up, size, vp, size);
1535 refmpn_mullo_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
1537 mp_ptr tp = refmpn_malloc_limbs (2*size);
1538 refmpn_mul (tp, up, size, vp, size);
1539 refmpn_copyi (prodp, tp, size);
1544 refmpn_sqr (mp_ptr dst, mp_srcptr src, mp_size_t size)
1546 refmpn_mul (dst, src, size, src, size);
1549 /* Allowing usize<vsize, usize==0 or vsize==0. */
1551 refmpn_mul_any (mp_ptr prodp,
1552 mp_srcptr up, mp_size_t usize,
1553 mp_srcptr vp, mp_size_t vsize)
1555 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, up, usize));
1556 ASSERT (! refmpn_overlap_p (prodp, usize+vsize, vp, vsize));
1557 ASSERT (usize >= 0);
1558 ASSERT (vsize >= 0);
1559 ASSERT_MPN (up, usize);
1560 ASSERT_MPN (vp, vsize);
1564 refmpn_fill (prodp, vsize, CNST_LIMB(0));
1570 refmpn_fill (prodp, usize, CNST_LIMB(0));
1575 refmpn_mul (prodp, up, usize, vp, vsize);
1577 refmpn_mul (prodp, vp, vsize, up, usize);
1582 refmpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y)
1588 ASSERT (! refmpn_zero_p (xp, xsize));
1589 ASSERT_MPN (xp, xsize);
1592 x = refmpn_mod_1 (xp, xsize, y);
1597 while ((x & 1) == 0 && (y & 1) == 0)
1606 while ((x & 1) == 0) x >>= 1;
1607 while ((y & 1) == 0) y >>= 1;
1610 MP_LIMB_T_SWAP (x, y);
1621 /* Based on the full limb x, not nails. */
1623 refmpn_count_leading_zeros (mp_limb_t x)
1629 while ((x & GMP_LIMB_HIGHBIT) == 0)
1637 /* Full limbs allowed, not limited to nails. */
1639 refmpn_count_trailing_zeros (mp_limb_t x)
1646 while ((x & 1) == 0)
1654 /* Strip factors of two (low zero bits) from {p,size} by right shifting.
1655 The return value is the number of twos stripped. */
1657 refmpn_strip_twos (mp_ptr p, mp_size_t size)
1663 ASSERT (! refmpn_zero_p (p, size));
1664 ASSERT_MPN (p, size);
1666 for (limbs = 0; p[0] == 0; limbs++)
1668 refmpn_copyi (p, p+1, size-1);
1672 shift = refmpn_count_trailing_zeros (p[0]);
1674 refmpn_rshift (p, p, size, shift);
1676 return limbs*GMP_NUMB_BITS + shift;
1680 refmpn_gcd (mp_ptr gp, mp_ptr xp, mp_size_t xsize, mp_ptr yp, mp_size_t ysize)
1684 ASSERT (ysize >= 1);
1685 ASSERT (xsize >= ysize);
1686 ASSERT ((xp[0] & 1) != 0);
1687 ASSERT ((yp[0] & 1) != 0);
1688 /* ASSERT (xp[xsize-1] != 0); */ /* don't think x needs to be odd */
1689 ASSERT (yp[ysize-1] != 0);
1690 ASSERT (refmpn_overlap_fullonly_p (gp, xp, xsize));
1691 ASSERT (refmpn_overlap_fullonly_p (gp, yp, ysize));
1692 ASSERT (! refmpn_overlap_p (xp, xsize, yp, ysize));
1694 ASSERT (refmpn_msbone (xp[xsize-1]) >= refmpn_msbone (yp[ysize-1]));
1695 ASSERT_MPN (xp, xsize);
1696 ASSERT_MPN (yp, ysize);
1698 refmpn_strip_twos (xp, xsize);
1699 MPN_NORMALIZE (xp, xsize);
1700 MPN_NORMALIZE (yp, ysize);
1704 cmp = refmpn_cmp_twosizes (xp, xsize, yp, ysize);
1708 MPN_PTR_SWAP (xp,xsize, yp,ysize);
1710 ASSERT_NOCARRY (refmpn_sub (xp, xp, xsize, yp, ysize));
1712 refmpn_strip_twos (xp, xsize);
1713 MPN_NORMALIZE (xp, xsize);
1716 refmpn_copyi (gp, xp, xsize);
1721 ref_popc_limb (mp_limb_t src)
1723 unsigned long count;
1727 for (i = 0; i < GMP_LIMB_BITS; i++)
1736 refmpn_popcount (mp_srcptr sp, mp_size_t size)
1738 unsigned long count = 0;
1742 ASSERT_MPN (sp, size);
1744 for (i = 0; i < size; i++)
1745 count += ref_popc_limb (sp[i]);
1750 refmpn_hamdist (mp_srcptr s1p, mp_srcptr s2p, mp_size_t size)
1753 unsigned long count;
1756 ASSERT_MPN (s1p, size);
1757 ASSERT_MPN (s2p, size);
1762 d = refmpn_malloc_limbs (size);
1763 refmpn_xor_n (d, s1p, s2p, size);
1764 count = refmpn_popcount (d, size);
1772 refmpn_mod2 (mp_limb_t r[2], const mp_limb_t a[2], const mp_limb_t d[2])
1777 ASSERT (! refmpn_overlap_p (r, (mp_size_t) 2, d, (mp_size_t) 2));
1781 D[1] = d[1], D[0] = d[0];
1782 r[1] = a[1], r[0] = a[0];
1787 if (D[1] & GMP_NUMB_HIGHBIT)
1789 if (refmpn_cmp (r, D, (mp_size_t) 2) <= 0)
1791 refmpn_lshift (D, D, (mp_size_t) 2, 1);
1793 ASSERT (n <= GMP_NUMB_BITS);
1798 if (refmpn_cmp (r, D, (mp_size_t) 2) >= 0)
1799 ASSERT_NOCARRY (refmpn_sub_n (r, r, D, (mp_size_t) 2));
1800 refmpn_rshift (D, D, (mp_size_t) 2, 1);
1804 ASSERT (refmpn_cmp (r, d, (mp_size_t) 2) < 0);
1809 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1810 particular the trial quotient is allowed to be 2 too big. */
1812 refmpn_sb_div_qr (mp_ptr qp,
1813 mp_ptr np, mp_size_t nsize,
1814 mp_srcptr dp, mp_size_t dsize)
1816 mp_limb_t retval = 0;
1818 mp_limb_t d1 = dp[dsize-1];
1819 mp_ptr np_orig = refmpn_memdup_limbs (np, nsize);
1821 ASSERT (nsize >= dsize);
1822 /* ASSERT (dsize > 2); */
1823 ASSERT (dsize >= 2);
1824 ASSERT (dp[dsize-1] & GMP_NUMB_HIGHBIT);
1825 ASSERT (! refmpn_overlap_p (qp, nsize-dsize, np, nsize) || qp+dsize >= np);
1826 ASSERT_MPN (np, nsize);
1827 ASSERT_MPN (dp, dsize);
1830 if (refmpn_cmp (np+i, dp, dsize) >= 0)
1832 ASSERT_NOCARRY (refmpn_sub_n (np+i, np+i, dp, dsize));
1836 for (i--; i >= 0; i--)
1838 mp_limb_t n0 = np[i+dsize];
1839 mp_limb_t n1 = np[i+dsize-1];
1840 mp_limb_t q, dummy_r;
1846 q = refmpn_udiv_qrnnd (&dummy_r, n0, n1 << GMP_NAIL_BITS,
1847 d1 << GMP_NAIL_BITS);
1849 n0 -= refmpn_submul_1 (np+i, dp, dsize, q);
1850 ASSERT (n0 == 0 || n0 == MP_LIMB_T_MAX);
1854 if (! refmpn_add_n (np+i, np+i, dp, dsize))
1857 ASSERT_CARRY (refmpn_add_n (np+i, np+i, dp, dsize));
1865 /* remainder < divisor */
1866 #if 0 /* ASSERT triggers gcc 4.2.1 bug */
1867 ASSERT (refmpn_cmp (np, dp, dsize) < 0);
1870 /* multiply back to original */
1872 mp_ptr mp = refmpn_malloc_limbs (nsize);
1874 refmpn_mul_any (mp, qp, nsize-dsize, dp, dsize);
1876 ASSERT_NOCARRY (refmpn_add_n (mp+nsize-dsize,mp+nsize-dsize, dp, dsize));
1877 ASSERT_NOCARRY (refmpn_add (mp, mp, nsize, np, dsize));
1878 ASSERT (refmpn_cmp (mp, np_orig, nsize) == 0);
1887 /* Similar to the old mpn/generic/sb_divrem_mn.c, but somewhat simplified, in
1888 particular the trial quotient is allowed to be 2 too big. */
1890 refmpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
1891 mp_ptr np, mp_size_t nsize,
1892 mp_srcptr dp, mp_size_t dsize)
1895 ASSERT_MPN (np, nsize);
1896 ASSERT_MPN (dp, dsize);
1898 ASSERT (dp[dsize-1] != 0);
1902 rp[0] = refmpn_divmod_1 (qp, np, nsize, dp[0]);
1907 mp_ptr n2p = refmpn_malloc_limbs (nsize+1);
1908 mp_ptr d2p = refmpn_malloc_limbs (dsize);
1909 int norm = refmpn_count_leading_zeros (dp[dsize-1]) - GMP_NAIL_BITS;
1911 n2p[nsize] = refmpn_lshift_or_copy (n2p, np, nsize, norm);
1912 ASSERT_NOCARRY (refmpn_lshift_or_copy (d2p, dp, dsize, norm));
1914 refmpn_sb_div_qr (qp, n2p, nsize+1, d2p, dsize);
1915 refmpn_rshift_or_copy (rp, n2p, dsize, norm);
1917 /* ASSERT (refmpn_zero_p (tp+dsize, nsize-dsize)); */
1924 refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
1929 ASSERT_MPN (up, 2*n);
1930 /* ASSERT about directed overlap rp, up */
1931 /* ASSERT about overlap rp, mp */
1932 /* ASSERT about overlap up, mp */
1934 for (j = n - 1; j >= 0; j--)
1936 up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
1939 cy = mpn_add_n (rp, up, up - n, n);
1941 mpn_sub_n (rp, rp, mp, n);
1945 refmpn_get_str (unsigned char *dst, int base, mp_ptr src, mp_size_t size)
1952 ASSERT (base < numberof (mp_bases));
1953 ASSERT (size == 0 || src[size-1] != 0);
1954 ASSERT_MPN (src, size);
1956 MPN_SIZEINBASE (dsize, src, size, base);
1957 ASSERT (dsize >= 1);
1958 ASSERT (! byte_overlap_p (dst, (mp_size_t) dsize, src, size * BYTES_PER_MP_LIMB));
1966 /* don't clobber input for power of 2 bases */
1968 src = refmpn_memdup_limbs (src, size);
1975 *d = refmpn_divrem_1 (src, (mp_size_t) 0, src, size, (mp_limb_t) base);
1976 size -= (src[size-1] == 0);
1980 /* Move result back and decrement dsize if we didn't generate
1981 the maximum possible digits. */
1986 for (i = 0; i < dsize; i++)
1998 ref_bswap_limb (mp_limb_t src)
2004 for (i = 0; i < BYTES_PER_MP_LIMB; i++)
2006 dst = (dst << 8) + (src & 0xFF);
2013 /* These random functions are mostly for transitional purposes while adding
2014 nail support, since they're independent of the normal mpn routines. They
2015 can probably be removed when those normal routines are reliable, though
2016 perhaps something independent would still be useful at times. */
2018 #if GMP_LIMB_BITS == 32
2019 #define RAND_A CNST_LIMB(0x29CF535)
2021 #if GMP_LIMB_BITS == 64
2022 #define RAND_A CNST_LIMB(0xBAECD515DAF0B49D)
2025 mp_limb_t refmpn_random_seed;
2028 refmpn_random_half (void)
2030 refmpn_random_seed = refmpn_random_seed * RAND_A + 1;
2031 return (refmpn_random_seed >> GMP_LIMB_BITS/2);
2035 refmpn_random_limb (void)
2037 return ((refmpn_random_half () << (GMP_LIMB_BITS/2))
2038 | refmpn_random_half ()) & GMP_NUMB_MASK;
2042 refmpn_random (mp_ptr ptr, mp_size_t size)
2045 if (GMP_NAIL_BITS == 0)
2047 mpn_random (ptr, size);
2051 for (i = 0; i < size; i++)
2052 ptr[i] = refmpn_random_limb ();
2056 refmpn_random2 (mp_ptr ptr, mp_size_t size)
2059 mp_limb_t bit, mask, limb;
2062 if (GMP_NAIL_BITS == 0)
2064 mpn_random2 (ptr, size);
2068 #define RUN_MODULUS 32
2070 /* start with ones at a random pos in the high limb */
2071 bit = CNST_LIMB(1) << (refmpn_random_half () % GMP_NUMB_BITS);
2075 for (i = size-1; i >= 0; i--)
2082 run = (refmpn_random_half () % RUN_MODULUS) + 1;
2086 limb |= (bit & mask);
2093 bit = GMP_NUMB_HIGHBIT;
2097 /* This is a simple bitwise algorithm working high to low across "s" and
2098 testing each time whether setting the bit would make s^2 exceed n. */
2100 refmpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nsize)
2103 mp_size_t ssize, talloc, tsize, dsize, ret, ilimbs;
2108 ASSERT (nsize >= 0);
2110 /* If n==0, then s=0 and r=0. */
2114 ASSERT (np[nsize - 1] != 0);
2115 ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nsize));
2116 ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nsize + 1) / 2, rp, nsize));
2117 ASSERT (! MPN_OVERLAP_P (sp, (nsize + 1) / 2, np, nsize));
2120 ssize = (nsize+1)/2;
2121 refmpn_zero (sp, ssize);
2123 /* the remainder so far */
2124 dp = refmpn_memdup_limbs (np, nsize);
2128 talloc = 2*ssize + 1;
2129 tp = refmpn_malloc_limbs (talloc);
2131 for (i = GMP_NUMB_BITS * ssize - 1; i >= 0; i--)
2133 /* t = 2*s*2^i + 2^(2*i), being the amount s^2 will increase by if 2^i
2136 ilimbs = (i+1) / GMP_NUMB_BITS;
2137 ibit = (i+1) % GMP_NUMB_BITS;
2138 refmpn_zero (tp, ilimbs);
2139 c = refmpn_lshift_or_copy (tp+ilimbs, sp, ssize, ibit);
2140 tsize = ilimbs + ssize;
2144 ilimbs = (2*i) / GMP_NUMB_BITS;
2145 ibit = (2*i) % GMP_NUMB_BITS;
2146 if (ilimbs + 1 > tsize)
2148 refmpn_zero_extend (tp, tsize, ilimbs + 1);
2151 c = refmpn_add_1 (tp+ilimbs, tp+ilimbs, tsize-ilimbs,
2152 CNST_LIMB(1) << ibit);
2153 ASSERT (tsize < talloc);
2157 if (refmpn_cmp_twosizes (dp, dsize, tp, tsize) >= 0)
2159 /* set this bit in s and subtract from the remainder */
2160 refmpn_setbit (sp, i);
2162 ASSERT_NOCARRY (refmpn_sub_n (dp, dp, tp, dsize));
2163 dsize = refmpn_normalize (dp, dsize);
2169 ret = ! refmpn_zero_p (dp, dsize);
2173 ASSERT (dsize == 0 || dp[dsize-1] != 0);
2174 refmpn_copy (rp, dp, dsize);